Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Adding API for signed BC4/BC5 variants #32

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ISPC Texture Compressor/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ int WINAPI wWinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdL
{
CDXUTComboBox *comboBox = gSampleUI.GetComboBox(IDC_PROFILE);
comboBox->AddItem(L"BC4 (R)", (void*)(CompressImageBC4));
comboBox->AddItem(L"BC4S (R)", (void*)(CompressImageBC4S));
comboBox->AddItem(L"BC5 (RG)", (void*)(CompressImageBC5));
comboBox->AddItem(L"BC5S (RG)", (void*)(CompressImageBC5S));
comboBox->AddItem(L"BC6H veryfast", (void *)(CompressImageBC6H_veryfast));
comboBox->AddItem(L"BC6H fast", (void *)(CompressImageBC6H_fast));
comboBox->AddItem(L"BC6H basic", (void *)(CompressImageBC6H_basic));
Expand Down
20 changes: 18 additions & 2 deletions ISPC Texture Compressor/processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,8 @@ static inline DXGI_FORMAT GetNonSRGBFormat(DXGI_FORMAT f) {
case DXGI_FORMAT_BC3_UNORM_SRGB: return DXGI_FORMAT_BC3_UNORM;
case DXGI_FORMAT_BC4_UNORM: return DXGI_FORMAT_BC4_UNORM;
case DXGI_FORMAT_BC5_UNORM: return DXGI_FORMAT_BC5_UNORM;
case DXGI_FORMAT_BC4_SNORM: return DXGI_FORMAT_BC4_SNORM;
case DXGI_FORMAT_BC5_SNORM: return DXGI_FORMAT_BC5_SNORM;
case DXGI_FORMAT_BC7_UNORM_SRGB: return DXGI_FORMAT_BC7_UNORM;
case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: return DXGI_FORMAT_R8G8B8A8_UNORM;
default: assert(!"Unknown format!");
Expand Down Expand Up @@ -1119,10 +1121,12 @@ int GetBytesPerBlock(CompressionFunc* fn)
default:
case DXGI_FORMAT_BC1_UNORM_SRGB:
case DXGI_FORMAT_BC4_UNORM:
case DXGI_FORMAT_BC4_SNORM:
return 8;

case DXGI_FORMAT_BC3_UNORM_SRGB:
case DXGI_FORMAT_BC5_UNORM:
case DXGI_FORMAT_BC5_SNORM:
case DXGI_FORMAT_BC7_UNORM_SRGB:
case DXGI_FORMAT_BC6H_UF16:
return 16;
Expand All @@ -1131,12 +1135,12 @@ int GetBytesPerBlock(CompressionFunc* fn)

bool IsBC4(CompressionFunc* fn)
{
return fn == CompressImageBC4;
return fn == CompressImageBC4 || fn == CompressImageBC4S;
}

bool IsBC5(CompressionFunc* fn)
{
return fn == CompressImageBC5;
return fn == CompressImageBC5 || fn == CompressImageBC5S;
}

bool IsBC6H(CompressionFunc* fn)
Expand All @@ -1155,6 +1159,8 @@ DXGI_FORMAT GetFormatFromCompressionFunc(CompressionFunc* fn)
if (fn == CompressImageBC3) return DXGI_FORMAT_BC3_UNORM_SRGB;
if (fn == CompressImageBC4) return DXGI_FORMAT_BC4_UNORM;
if (fn == CompressImageBC5) return DXGI_FORMAT_BC5_UNORM;
if (fn == CompressImageBC4S) return DXGI_FORMAT_BC4_SNORM;
if (fn == CompressImageBC5S) return DXGI_FORMAT_BC5_SNORM;

if (IsBC6H(fn)) return DXGI_FORMAT_BC6H_UF16;

Expand All @@ -1181,6 +1187,16 @@ void CompressImageBC5(const rgba_surface* input, BYTE* output)
CompressBlocksBC5(input, output);
}

void CompressImageBC4S(const rgba_surface* input, BYTE* output)
{
CompressBlocksBC4S(input, output);
}

void CompressImageBC5S(const rgba_surface* input, BYTE* output)
{
CompressBlocksBC5S(input, output);
}

#define DECLARE_CompressImageBC6H_profile(profile) \
void CompressImageBC6H_ ## profile(const rgba_surface* input, BYTE* output) \
{ \
Expand Down
2 changes: 2 additions & 0 deletions ISPC Texture Compressor/processing.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ void CompressImageBC1(const rgba_surface* input, BYTE* output);
void CompressImageBC3(const rgba_surface* input, BYTE* output);
void CompressImageBC4(const rgba_surface* input, BYTE* output);
void CompressImageBC5(const rgba_surface* input, BYTE* output);
void CompressImageBC4S(const rgba_surface* input, BYTE* output);
void CompressImageBC5S(const rgba_surface* input, BYTE* output);
void CompressImageBC6H_veryfast(const rgba_surface* input, BYTE* output);
void CompressImageBC6H_fast(const rgba_surface* input, BYTE* output);
void CompressImageBC6H_basic(const rgba_surface* input, BYTE* output);
Expand Down
11 changes: 11 additions & 0 deletions ispc_texcomp/ispc_texcomp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,11 +461,22 @@ void CompressBlocksBC4(const rgba_surface* src, uint8_t* dst)
ispc::CompressBlocksBC4_ispc((ispc::rgba_surface*)src, dst);
}

void CompressBlocksBC4S(const rgba_surface* src, uint8_t* dst)
{
ispc::CompressBlocksBC4S_ispc((ispc::rgba_surface*)src, dst);
}

void CompressBlocksBC5(const rgba_surface* src, uint8_t* dst)
{
ispc::CompressBlocksBC5_ispc((ispc::rgba_surface*)src, dst);
}

void CompressBlocksBC5S(const rgba_surface* src, uint8_t* dst)
{
ispc::CompressBlocksBC5S_ispc((ispc::rgba_surface*)src, dst);
}


void CompressBlocksBC7(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings)
{
ispc::CompressBlocksBC7_ispc((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
Expand Down
6 changes: 4 additions & 2 deletions ispc_texcomp/ispc_texcomp.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ extern "C" void ReplicateBorders(rgba_surface* dst_slice, const rgba_surface* sr
- LDR input is 32 bit/pixel (sRGB), HDR is 64 bit/pixel (half float)
- for BC4 input is 8bit/pixel (R8), for BC5 input is 16bit/pixel (RG8)
- dst buffer must be allocated with enough space for the compressed texture:
- 8 bytes/block for BC1/BC4/ETC1,
- 16 bytes/block for BC3/BC5/BC6H/BC7/ASTC
- 8 bytes/block for BC1/BC4/BC4S/ETC1,
- 16 bytes/block for BC3/BC5/BC5S/BC6H/BC7/ASTC
- the blocks are stored in raster scan order (natural CPU texture layout)
- use the GetProfile_* functions to select various speed/quality tradeoffs
- the RGB profiles are slightly faster as they ignore the alpha channel
Expand All @@ -118,7 +118,9 @@ extern "C" void ReplicateBorders(rgba_surface* dst_slice, const rgba_surface* sr
extern "C" void CompressBlocksBC1(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC3(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC4S(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC5(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC5S(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC6H(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings);
extern "C" void CompressBlocksBC7(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings);
extern "C" void CompressBlocksETC1(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings);
Expand Down
84 changes: 84 additions & 0 deletions ispc_texcomp/kernel.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,47 @@ inline void CompressBlockBC3_alpha(float block[16], uint32 data[2])
data[1] |= qblock[1]<<8;
}

inline void CompressBlockBC4Signed(float block[16], uint32 data[2])
{
float ep[2] = { 255, 0 };

for (uniform int k=0; k<16; k++)
{
ep[0] = min(ep[0], block[k]);
ep[1] = max(ep[1], block[k]);
}

if (ep[0] == ep[1]) ep[1] = ep[0]+0.1f;

uint32 qblock[2] = { 0, 0 };
float scale = 7f/(ep[1]-ep[0]);

for (uniform int k=0; k<16; k++)
{
float v = block[k];
float proj = (v-ep[0])*scale+0.5f;

int q = clamp((int)proj, 0, 7);

q = 7-q;

if (q > 0) q++;
if (q==8) q = 1;

qblock[k/8] |= q << ((k%8)*3);
}

// (could be improved by refinement)

for (uniform int e=0; e<2; e++) ep[e] = clamp((int)ep[e] - 0x80, -128, 127);

data[0] = (0xFF & (int)ep[0]) << 8;
data[0] |= (0xFF & (int)ep[1]);
data[0] |= qblock[0]<<16;
data[1] = qblock[0]>>16;
data[1] |= qblock[1]<<8;
}

inline void CompressBlockBC1(uniform rgba_surface src[], int xx, uniform int yy, uniform uint8 dst[])
{
float block[48];
Expand Down Expand Up @@ -658,6 +699,18 @@ inline void CompressBlockBC4(uniform rgba_surface src[], int xx, uniform int yy,
store_data(dst, src->width, xx, yy, data, 2);
}

inline void CompressBlockBC4S(uniform rgba_surface src[], int xx, uniform int yy, uniform uint8 dst[])
{
float block[16];
uint32 data[2];

load_block_r_8bit(block, src, xx, yy);

CompressBlockBC4Signed(block, data);

store_data(dst, src->width, xx, yy, data, 2);
}

inline void CompressBlockBC5(uniform rgba_surface src[], int xx, uniform int yy, uniform uint8 dst[])
{
float block[32];
Expand All @@ -671,6 +724,19 @@ inline void CompressBlockBC5(uniform rgba_surface src[], int xx, uniform int yy,
store_data(dst, src->width, xx, yy, data, 4);
}

inline void CompressBlockBC5S(uniform rgba_surface src[], int xx, uniform int yy, uniform uint8 dst[])
{
float block[32];
uint32 data[4];

load_block_interleaved_rg_8bit(block, src, xx, yy);

CompressBlockBC4Signed(block, data);
CompressBlockBC4Signed(&block[16], &data[2]);

store_data(dst, src->width, xx, yy, data, 4);
}

export void CompressBlocksBC1_ispc(uniform rgba_surface src[], uniform uint8 dst[])
{
for (uniform int yy = 0; yy<src->height/4; yy++)
Expand Down Expand Up @@ -698,6 +764,15 @@ export void CompressBlocksBC4_ispc(uniform rgba_surface src[], uniform uint8 dst
}
}

export void CompressBlocksBC4S_ispc(uniform rgba_surface src[], uniform uint8 dst[])
{
for (uniform int yy = 0; yy<src->height/4; yy++)
foreach (xx = 0 ... src->width/4)
{
CompressBlockBC4S(src, xx, yy, dst);
}
}

export void CompressBlocksBC5_ispc(uniform rgba_surface src[], uniform uint8 dst[])
{
for (uniform int yy = 0; yy<src->height/4; yy++)
Expand All @@ -707,6 +782,15 @@ export void CompressBlocksBC5_ispc(uniform rgba_surface src[], uniform uint8 dst
}
}

export void CompressBlocksBC5S_ispc(uniform rgba_surface src[], uniform uint8 dst[])
{
for (uniform int yy = 0; yy<src->height/4; yy++)
foreach (xx = 0 ... src->width/4)
{
CompressBlockBC5S(src, xx, yy, dst);
}
}

///////////////////////////////////////////////////////////
// BC7 encoding

Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ formats:
* BC7
* ASTC (LDR, block sizes up to 8x8)
* ETC1
* BC1, BC3 (aka DXT1, DXT5) and BC4, BC5 (aka ATI1N, ATI2N)
* BC1, BC3 (aka DXT1, DXT5)
* BC4, BC5 (aka ATI1N, ATI2N) both UNORM and SNORM variants

The library uses the [ISPC compiler](https://ispc.github.io/) to generate CPU
SIMD-optimized compression algorithms. For more information, see the [Fast ISPC
Expand Down