small force emit constant shifts

This commit is contained in:
lizzie 2026-01-30 07:37:11 +00:00 committed by crueter
parent 88eb1aab39
commit 89fc2b94e6

View file

@ -165,7 +165,7 @@ const uint mod8_table = 0
// Assumes num_bits < to_bit, num_bits and to_bit != 0 // Assumes num_bits < to_bit, num_bits and to_bit != 0
uint ReplicateBits(uint value, uint num_bits, uint to_bit, uint table) { uint ReplicateBits(uint value, uint num_bits, uint to_bit, uint table) {
const uint repl = value & ((1 << num_bits) - 1); const uint repl = value & ((1 << num_bits) - 1);
const uint shift = (table >> (num_bits * 2)) & 3; const uint shift = (table >> (num_bits << 1)) & 3;
uint v = repl; uint v = repl;
v |= v << (num_bits << 0); // [ xxxx xxrr ] v |= v << (num_bits << 0); // [ xxxx xxrr ]
v |= v << (num_bits << 1); // [ xxxx rrrr ] v |= v << (num_bits << 1); // [ xxxx rrrr ]
@ -266,7 +266,7 @@ uint GetBitLength(uint n_vals, uint encoding_index) {
const uint num_bits = NumBits(encoding_value); const uint num_bits = NumBits(encoding_value);
const uvec3 div_constant = uvec3(0, 0x5556, 0x3334); const uvec3 div_constant = uvec3(0, 0x5556, 0x3334);
return num_bits * n_vals return num_bits * n_vals
+ ((((n_vals * ((0x870 >> (encoding * 4)) & 0xf)) + ((0x420 >> (encoding * 4)) & 0xf)) + ((((n_vals * ((0x870 >> (encoding << 2)) & 0xf)) + ((0x420 >> (encoding << 2)) & 0xf))
* div_constant[encoding]) >> 16); * div_constant[encoding]) >> 16);
} }
@ -647,19 +647,19 @@ uint UnquantizeTexelWeight(EncodingData val) {
: FastReplicateTo6(bitval, bitlen); : FastReplicateTo6(bitval, bitlen);
} else if (encoding == TRIT || encoding == QUINT) { } else if (encoding == TRIT || encoding == QUINT) {
uint B = 0, C = 0, D = 0; uint B = 0, C = 0, D = 0;
uint b_mask = (0x3100 >> (bitlen * 4)) & 0xf; uint b_mask = (0x3100 >> (bitlen << 2)) & 0xf;
uint b = (bitval >> 1) & b_mask; uint b = (bitval >> 1) & b_mask;
D = QuintTritValue(val); D = QuintTritValue(val);
if (encoding == TRIT) { if (encoding == TRIT) {
switch (bitlen) { switch (bitlen) {
case 0: return D * 32; //0,32,64 case 0: return D << 5; //0,32,64
case 1: C = 50; break; case 1: C = 50; break;
case 2: C = 23; B = (b << 6) | (b << 2) | b; break; case 2: C = 23; B = (b << 6) | (b << 2) | b; break;
case 3: C = 11; B = (b << 5) | b; break; case 3: C = 11; B = (b << 5) | b; break;
} }
} else if (encoding == QUINT) { } else if (encoding == QUINT) {
switch (bitlen) { switch (bitlen) {
case 0: return D * 16; //0, 16, 32, 48, 64 case 0: return D << 4; //0, 16, 32, 48, 64
case 1: C = 28; break; case 1: C = 28; break;
case 2: C = 13; B = (b << 6) | (b << 1); break; case 2: C = 13; B = (b << 6) | (b << 1); break;
} }
@ -681,7 +681,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
} }
uint GetUnquantizedTexelWeight(uint offset_base, uint plane, bool is_dual_plane) { uint GetUnquantizedTexelWeight(uint offset_base, uint plane, bool is_dual_plane) {
const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; const uint offset = is_dual_plane ? (offset_base << 1) + plane : offset_base;
return result_vector[offset]; return result_vector[offset];
} }
@ -812,7 +812,7 @@ int FindLayout(uint mode) {
| ((3) << (7 * 4)) //01a0 -> 7, 3 + 5 = 8 | ((3) << (7 * 4)) //01a0 -> 7, 3 + 5 = 8
; ;
const uint if_mode3_t = sh3_mode + uint((mode & 0x10c) == 0x10c); const uint if_mode3_t = sh3_mode + uint((mode & 0x10c) == 0x10c);
const uint if_mode3_f = 5 + ((fl_const_table >> (sh0_mode * 4)) & 7); const uint if_mode3_f = 5 + ((fl_const_table >> (sh0_mode << 2)) & 7);
return int((if_mode3_t & mask) | (if_mode3_f & ~mask)); return int((if_mode3_t & mask) | (if_mode3_f & ~mask));
} }
@ -902,7 +902,7 @@ void DecompressBlock(ivec3 coord) {
const uint base_mode = base_cem & 3; const uint base_mode = base_cem & 3;
const uint max_weight = DecodeMaxWeight(mode_layout, mode); const uint max_weight = DecodeMaxWeight(mode_layout, mode);
const uint weight_bits = GetPackedBitSize(size_params, dual_plane, max_weight); const uint weight_bits = GetPackedBitSize(size_params, dual_plane, max_weight);
const uint extra_cem_bits = base_mode > 0 ? ((0x85200 >> (num_partitions * 4)) & 0x0f) : 0; const uint extra_cem_bits = base_mode > 0 ? ((0x85200 >> (num_partitions << 2)) & 0x0f) : 0;
const uint plane_selector_bits = dual_plane ? 2 : 0; const uint plane_selector_bits = dual_plane ? 2 : 0;
uint remaining_bits = 128 - weight_bits - total_bitsread; uint remaining_bits = 128 - weight_bits - total_bitsread;
remaining_bits -= extra_cem_bits; remaining_bits -= extra_cem_bits;
@ -928,7 +928,7 @@ void DecompressBlock(ivec3 coord) {
const uint extra_cem = StreamBits(extra_cem_bits); const uint extra_cem = StreamBits(extra_cem_bits);
const uint cem = ((extra_cem << 6) | base_cem) >> 2; const uint cem = ((extra_cem << 6) | base_cem) >> 2;
const uint c0 = cem & ((1 << num_partitions) - 1); const uint c0 = cem & ((1 << num_partitions) - 1);
const uint c1 = (cem >> num_partitions) & ((1 << (num_partitions * 2)) - 1); const uint c1 = (cem >> num_partitions) & ((1 << (num_partitions << 1)) - 1);
const uvec4 c = (uvec4(c0) >> uvec4(0, 1, 2, 3)) & 1; const uvec4 c = (uvec4(c0) >> uvec4(0, 1, 2, 3)) & 1;
const uvec4 m = (uvec4(c1) >> uvec4(0, 2, 4, 6)) & 3; const uvec4 m = (uvec4(c1) >> uvec4(0, 2, 4, 6)) & 3;
color_endpoint_mode = (((uvec4(base_mode) - (1 - c)) << 2) | m) & cem_mask; color_endpoint_mode = (((uvec4(base_mode) - (1 - c)) << 2) | m) & cem_mask;
@ -951,36 +951,27 @@ void DecompressBlock(ivec3 coord) {
color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
const uint clear_byte_start = (weight_bits >> 3) + 1; const uint clear_byte_start = (weight_bits >> 3) + 1;
const uint byte_insert = ExtractBits(color_endpoint_data, (clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits & 7)) - 1)); const uint byte_insert = ExtractBits(color_endpoint_data, (clear_byte_start - 1) << 3, 8) & uint(((1 << (weight_bits & 7)) - 1));
const uint vec_index = (clear_byte_start - 1) >> 2; const uint vec_index = (clear_byte_start - 1) >> 2;
color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) & 3) * 8, 8); color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) & 3) << 3, 8);
for (uint i = clear_byte_start; i < 16; ++i) { for (uint i = clear_byte_start; i < 16; ++i)
const uint idx = i >> 2; color_endpoint_data[i >> 2] = bitfieldInsert(color_endpoint_data[i >> 2], 0, int(i & 3) << 3, 8);
color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i & 3) * 8, 8);
}
// Re-init vector variables for next decode phase // Re-init vector variables for next decode phase
result_index = 0; result_index = 0;
color_bitsread = 0; color_bitsread = 0;
// The limit for the Unquantize phase, avoids decoding more data than needed. // The limit for the Unquantize phase, avoids decoding more data than needed.
result_vector_max_index = size_params.x * size_params.y; result_vector_max_index = (size_params.x * size_params.y) << uint(dual_plane);
if (dual_plane) {
result_vector_max_index *= 2;
}
DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
UnquantizeTexelWeights(size_params, dual_plane); UnquantizeTexelWeights(size_params, dual_plane);
for (uint j = 0; j < block_dims.y; j++) { for (uint j = 0; j < block_dims.y; j++) {
for (uint i = 0; i < block_dims.x; i++) { for (uint i = 0; i < block_dims.x; i++) {
uint local_partition = 0; const uint local_partition = Select2DPartition(partition_index, uvec2(i, j), num_partitions) & (0 - uint(num_partitions > 1));
if (num_partitions > 1) {
local_partition = Select2DPartition(partition_index, uvec2(i, j), num_partitions);
}
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane); const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane);
const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + 32) >> 6);
const vec4 p = (Cf / 65535.0f); const vec4 p = (Cf / 65535.0f);
imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
} }