mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-21 12:28:58 +02:00
[video_core] Implement GPU-accelerated texture unswizzling and optimize sparse texture handling (#3246)
- [Added] a new compute shader to handle block-linear unswizzling on the GPU, reducing CPU overhead during texture uploads - [Implemented] BlockLinearUnswizzle3DPass to take advantage of the new compute shader, unimplemented for OpenGL - [Implemented] texture streaming and queue system for large sparse textures to prevent hitches - [Implemented] aggressive garbage collection system to eject large sparse textures to save on memory (Unused) - [Added] user settings to adjust the streaming unswizzle system for low-end machines - [Improved] slightly the ASTC GPU decoding system Co-authored-by: Caio Oliveira <caiooliveirafarias0@gmail.com> Co-authored-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: DraVee <dravee@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3246 Reviewed-by: Maufeat <sahyno1996@gmail.com> Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: DraVee <dravee@eden-emu.dev> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: Forrest Keller <forrestmarkx@outlook.com> Co-committed-by: Forrest Keller <forrestmarkx@outlook.com>
This commit is contained in:
parent
f544004b5d
commit
ecd01e13fd
20 changed files with 1076 additions and 83 deletions
|
|
@ -18,6 +18,7 @@ set(SHADER_FILES
|
|||
blit_color_float.frag
|
||||
block_linear_unswizzle_2d.comp
|
||||
block_linear_unswizzle_3d.comp
|
||||
block_linear_unswizzle_3d_bcn.comp
|
||||
convert_abgr8_srgb_to_d24s8.frag
|
||||
convert_abgr8_to_d24s8.frag
|
||||
convert_abgr8_to_d32f.frag
|
||||
|
|
|
|||
|
|
@ -727,70 +727,35 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui
|
|||
}
|
||||
|
||||
uint UnquantizeTexelWeight(EncodingData val) {
|
||||
const uint encoding = Encoding(val);
|
||||
const uint bitlen = NumBits(val);
|
||||
const uint bitval = BitValue(val);
|
||||
const uint A = ReplicateBitTo7((bitval & 1));
|
||||
uint B = 0, C = 0, D = 0;
|
||||
uint result = 0;
|
||||
const uint bitlen_0_results[5] = {0, 16, 32, 48, 64};
|
||||
switch (encoding) {
|
||||
case JUST_BITS:
|
||||
return FastReplicateTo6(bitval, bitlen);
|
||||
case TRIT: {
|
||||
uint encoding = Encoding(val), bitlen = NumBits(val), bitval = BitValue(val);
|
||||
if (encoding == JUST_BITS) {
|
||||
return (bitlen >= 1 && bitlen <= 5)
|
||||
? uint(floor(0.5f + float(bitval) * 64.0f / float((1 << bitlen) - 1)))
|
||||
: FastReplicateTo6(bitval, bitlen);
|
||||
} else if (encoding == TRIT || encoding == QUINT) {
|
||||
uint B = 0, C = 0, D = 0;
|
||||
uint b_mask = (0x3100 >> (bitlen * 4)) & 0xf;
|
||||
uint b = (bitval >> 1) & b_mask;
|
||||
D = QuintTritValue(val);
|
||||
switch (bitlen) {
|
||||
case 0:
|
||||
return bitlen_0_results[D * 2];
|
||||
case 1: {
|
||||
C = 50;
|
||||
break;
|
||||
if (encoding == TRIT) {
|
||||
switch (bitlen) {
|
||||
case 0: return D * 32; //0,32,64
|
||||
case 1: C = 50; break;
|
||||
case 2: C = 23; B = (b << 6) | (b << 2) | b; break;
|
||||
case 3: C = 11; B = (b << 5) | b; break;
|
||||
}
|
||||
} else if (encoding == QUINT) {
|
||||
switch (bitlen) {
|
||||
case 0: return D * 16; //0, 16, 32, 48, 64
|
||||
case 1: C = 28; break;
|
||||
case 2: C = 13; B = (b << 6) | (b << 1); break;
|
||||
}
|
||||
}
|
||||
case 2: {
|
||||
C = 23;
|
||||
const uint b = (bitval >> 1) & 1;
|
||||
B = (b << 6) | (b << 2) | b;
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
C = 11;
|
||||
const uint cb = (bitval >> 1) & 3;
|
||||
B = (cb << 5) | cb;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
uint A = ReplicateBitTo7(bitval & 1);
|
||||
uint res = (A & 0x20) | (((D * C + B) ^ A) >> 2);
|
||||
return res + (res > 32 ? 1 : 0);
|
||||
}
|
||||
case QUINT: {
|
||||
D = QuintTritValue(val);
|
||||
switch (bitlen) {
|
||||
case 0:
|
||||
return bitlen_0_results[D];
|
||||
case 1: {
|
||||
C = 28;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
C = 13;
|
||||
const uint b = (bitval >> 1) & 1;
|
||||
B = (b << 6) | (b << 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (encoding != JUST_BITS && bitlen > 0) {
|
||||
result = D * C + B;
|
||||
result ^= A;
|
||||
result = (A & 0x20) | (result >> 2);
|
||||
}
|
||||
if (result > 32) {
|
||||
result += 1;
|
||||
}
|
||||
return result;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
|
||||
|
|
@ -1159,10 +1124,11 @@ void DecompressBlock(ivec3 coord) {
|
|||
}
|
||||
|
||||
uint SwizzleOffset(uvec2 pos) {
|
||||
const uint x = pos.x;
|
||||
const uint y = pos.y;
|
||||
return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
|
||||
((x % 32) / 16) * 32 + (y % 2) * 16 + (x % 16);
|
||||
return ((pos.x & 32u) << 3u) |
|
||||
((pos.y & 6u) << 5u) |
|
||||
((pos.x & 16u) << 1u) |
|
||||
((pos.y & 1u) << 4u) |
|
||||
(pos.x & 15u);
|
||||
}
|
||||
|
||||
void main() {
|
||||
|
|
|
|||
160
src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp
Normal file
160
src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_BUFFER 2
|
||||
#else
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout(location = n) uniform
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_BUFFER 0
|
||||
#endif
|
||||
|
||||
// --- Push Constants / Uniforms ---
|
||||
#ifdef VULKAN
|
||||
layout(push_constant) uniform PushConstants {
|
||||
uvec3 blocks_dim; // Offset 0
|
||||
uint bytes_per_block_log2; // Offset 12
|
||||
|
||||
uvec3 origin; // Offset 16
|
||||
uint slice_size; // Offset 28
|
||||
|
||||
uint block_size; // Offset 32
|
||||
uint x_shift; // Offset 36
|
||||
uint block_height; // Offset 40
|
||||
uint block_height_mask; // Offset 44
|
||||
|
||||
uint block_depth; // Offset 48
|
||||
uint block_depth_mask; // Offset 52
|
||||
int _pad; // Offset 56
|
||||
|
||||
ivec3 destination; // Offset 60
|
||||
} pc;
|
||||
#else
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec3 origin;
|
||||
UNIFORM(1) ivec3 destination;
|
||||
UNIFORM(2) uint bytes_per_block_log2;
|
||||
UNIFORM(3) uint slice_size;
|
||||
UNIFORM(4) uint block_size;
|
||||
UNIFORM(5) uint x_shift;
|
||||
UNIFORM(6) uint block_height;
|
||||
UNIFORM(7) uint block_height_mask;
|
||||
UNIFORM(8) uint block_depth;
|
||||
UNIFORM(9) uint block_depth_mask;
|
||||
UNIFORM(10) uvec3 blocks_dim;
|
||||
END_PUSH_CONSTANTS
|
||||
#define pc // Map pc prefix to nothing for OpenGL compatibility
|
||||
#endif
|
||||
|
||||
// --- Buffers ---
|
||||
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
|
||||
uint swizzle_table[];
|
||||
};
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_BUFFER, std430) writeonly buffer OutputBuffer {
|
||||
uint out_u32[];
|
||||
};
|
||||
|
||||
// --- Constants ---
|
||||
layout(local_size_x = 8, local_size_y = 8, local_size_z = 4) in;
|
||||
|
||||
const uint GOB_SIZE_X = 64;
|
||||
const uint GOB_SIZE_Y = 8;
|
||||
const uint GOB_SIZE_Z = 1;
|
||||
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
|
||||
|
||||
const uint GOB_SIZE_X_SHIFT = 6;
|
||||
const uint GOB_SIZE_Y_SHIFT = 3;
|
||||
const uint GOB_SIZE_Z_SHIFT = 0;
|
||||
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
|
||||
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1u, GOB_SIZE_Y - 1u);
|
||||
|
||||
// --- Helpers ---
|
||||
uint SwizzleOffset(uvec2 pos) {
|
||||
pos &= SWIZZLE_MASK;
|
||||
return swizzle_table[pos.y * 64u + pos.x];
|
||||
}
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
uint bpl2 = pc.bytes_per_block_log2;
|
||||
switch (bpl2) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 0u: return uvec4(u8data[offset], 0u, 0u, 0u);
|
||||
case 1u: return uvec4(u16data[offset / 2u], 0u, 0u, 0u);
|
||||
#else
|
||||
case 0u: return uvec4(bitfieldExtract(u32data[offset / 4u], int((offset * 8u) & 24u), 8), 0u, 0u, 0u);
|
||||
case 1u: return uvec4(bitfieldExtract(u32data[offset / 4u], int((offset * 8u) & 16u), 16), 0u, 0u, 0u);
|
||||
#endif
|
||||
case 2u: return uvec4(u32data[offset / 4u], 0u, 0u, 0u);
|
||||
case 3u: return uvec4(u64data[offset / 8u], 0u, 0u);
|
||||
case 4u: return u128data[offset / 16u];
|
||||
}
|
||||
return uvec4(0u);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec3 block_coord = gl_GlobalInvocationID;
|
||||
if (any(greaterThanEqual(block_coord, pc.blocks_dim))) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint bytes_per_block = 1u << pc.bytes_per_block_log2;
|
||||
// Origin is in pixels, divide by 4 for block-space (e.g. BCn formats)
|
||||
uvec3 pos;
|
||||
pos.x = (block_coord.x + (pc.origin.x >> 2u)) * bytes_per_block;
|
||||
pos.y = block_coord.y + (pc.origin.y >> 2u);
|
||||
pos.z = block_coord.z + pc.origin.z;
|
||||
|
||||
uint swizzle = SwizzleOffset(pos.xy);
|
||||
uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
|
||||
uint offset = 0u;
|
||||
// Apply block-linear offsets
|
||||
offset += (pos.z >> pc.block_depth) * pc.slice_size;
|
||||
offset += (pos.z & pc.block_depth_mask) << (GOB_SIZE_SHIFT + pc.block_height);
|
||||
offset += (block_y >> pc.block_height) * pc.block_size;
|
||||
offset += (block_y & pc.block_height_mask) << GOB_SIZE_SHIFT;
|
||||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << pc.x_shift;
|
||||
offset += swizzle;
|
||||
|
||||
uvec4 texel = ReadTexel(offset);
|
||||
|
||||
// Calculate linear output index
|
||||
uint block_index = block_coord.x +
|
||||
(block_coord.y * pc.blocks_dim.x) +
|
||||
(block_coord.z * pc.blocks_dim.x * pc.blocks_dim.y);
|
||||
uint out_idx = block_index * (bytes_per_block >> 2u);
|
||||
|
||||
out_u32[out_idx] = texel.x;
|
||||
out_u32[out_idx + 1u] = texel.y;
|
||||
if (pc.bytes_per_block_log2 == 4u) {
|
||||
out_u32[out_idx + 2u] = texel.z;
|
||||
out_u32[out_idx + 3u] = texel.w;
|
||||
}
|
||||
}
|
||||
|
|
@ -556,7 +556,7 @@ void TextureCacheRuntime::Finish() {
|
|||
glFinish();
|
||||
}
|
||||
|
||||
StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size, bool deferred) {
|
||||
return staging_buffer_pool.RequestUploadBuffer(size);
|
||||
}
|
||||
|
||||
|
|
@ -651,7 +651,8 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
|
|||
}
|
||||
|
||||
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
std::span<const SwizzleParameters> swizzles,
|
||||
u32 z_start, u32 z_count) {
|
||||
switch (image.info.type) {
|
||||
case ImageType::e2D:
|
||||
if (IsPixelFormatASTC(image.info.format)) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -72,7 +75,7 @@ public:
|
|||
|
||||
void Finish();
|
||||
|
||||
StagingBufferMap UploadStagingBuffer(size_t size);
|
||||
StagingBufferMap UploadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
|
|
@ -116,7 +119,8 @@ public:
|
|||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles,
|
||||
u32 z_start, u32 z_count);
|
||||
|
||||
void InsertUploadMemoryBarrier();
|
||||
|
||||
|
|
@ -223,6 +227,8 @@ public:
|
|||
|
||||
bool ScaleDown(bool ignore = false);
|
||||
|
||||
u64 allocation_tick;
|
||||
|
||||
private:
|
||||
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
#include "video_core/host_shaders/resolve_conditional_render_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_bcn_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
|
@ -622,7 +623,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
|
|
@ -637,9 +638,292 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
constexpr u32 BL3D_BINDING_SWIZZLE_TABLE = 0;
|
||||
constexpr u32 BL3D_BINDING_INPUT_BUFFER = 1;
|
||||
constexpr u32 BL3D_BINDING_OUTPUT_BUFFER = 2;
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, 3> BL3D_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = BL3D_BINDING_SWIZZLE_TABLE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // swizzle_table[]
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = BL3D_BINDING_INPUT_BUFFER,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // block-linear input
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = BL3D_BINDING_OUTPUT_BUFFER,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo BL3D_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 3,
|
||||
.texture_buffers = 0,
|
||||
.image_buffers = 0,
|
||||
.textures = 0,
|
||||
.images = 0,
|
||||
.score = 3,
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorUpdateTemplateEntry, 3>
|
||||
BL3D_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
|
||||
{
|
||||
.dstBinding = BL3D_BINDING_SWIZZLE_TABLE,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = BL3D_BINDING_SWIZZLE_TABLE * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
{
|
||||
.dstBinding = BL3D_BINDING_INPUT_BUFFER,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = BL3D_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
{
|
||||
.dstBinding = BL3D_BINDING_OUTPUT_BUFFER,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = BL3D_BINDING_OUTPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
}
|
||||
}};
|
||||
|
||||
struct alignas(16) BlockLinearUnswizzle3DPushConstants {
|
||||
u32 blocks_dim[3]; // Offset 0
|
||||
u32 bytes_per_block_log2; // Offset 12
|
||||
|
||||
u32 origin[3]; // Offset 16
|
||||
u32 slice_size; // Offset 28
|
||||
|
||||
u32 block_size; // Offset 32
|
||||
u32 x_shift; // Offset 36
|
||||
u32 block_height; // Offset 40
|
||||
u32 block_height_mask; // Offset 44
|
||||
|
||||
u32 block_depth; // Offset 48
|
||||
u32 block_depth_mask; // Offset 52
|
||||
s32 _pad; // Offset 56
|
||||
|
||||
s32 destination[3]; // Offset 60
|
||||
s32 _pad_end; // Offset 72
|
||||
};
|
||||
static_assert(sizeof(BlockLinearUnswizzle3DPushConstants) <= 128);
|
||||
|
||||
BlockLinearUnswizzle3DPass::BlockLinearUnswizzle3DPass(
|
||||
const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
|
||||
: ComputePass(
|
||||
device_, descriptor_pool_,
|
||||
BL3D_DESCRIPTOR_SET_BINDINGS,
|
||||
BL3D_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY,
|
||||
BL3D_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(BlockLinearUnswizzle3DPushConstants)>,
|
||||
BLOCK_LINEAR_UNSWIZZLE_3D_BCN_COMP_SPV),
|
||||
scheduler{scheduler_},
|
||||
staging_buffer_pool{staging_buffer_pool_},
|
||||
compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
|
||||
|
||||
BlockLinearUnswizzle3DPass::~BlockLinearUnswizzle3DPass() = default;
|
||||
|
||||
// God have mercy on my soul
|
||||
void BlockLinearUnswizzle3DPass::Unswizzle(
|
||||
Image& image,
|
||||
const StagingBufferRef& swizzled,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles,
|
||||
u32 z_start, u32 z_count)
|
||||
{
|
||||
using namespace VideoCommon::Accelerated;
|
||||
|
||||
const u32 MAX_BATCH_SLICES = std::min(z_count, image.info.size.depth);
|
||||
|
||||
if (!image.has_compute_unswizzle_buffer) {
|
||||
// Allocate exactly what this batch needs
|
||||
image.AllocateComputeUnswizzleBuffer(MAX_BATCH_SLICES);
|
||||
}
|
||||
|
||||
ASSERT(swizzles.size() == 1);
|
||||
const auto& sw = swizzles[0];
|
||||
const auto params = MakeBlockLinearSwizzle3DParams(sw, image.info);
|
||||
|
||||
const u32 blocks_x = (image.info.size.width + 3) / 4;
|
||||
const u32 blocks_y = (image.info.size.height + 3) / 4;
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
for (u32 z_offset = 0; z_offset < z_count; z_offset += MAX_BATCH_SLICES) {
|
||||
const u32 current_chunk_slices = std::min(MAX_BATCH_SLICES, z_count - z_offset);
|
||||
const u32 current_z_start = z_start + z_offset;
|
||||
|
||||
UnswizzleChunk(image, swizzled, sw, params, blocks_x, blocks_y,
|
||||
current_z_start, current_chunk_slices);
|
||||
}
|
||||
}
|
||||
|
||||
void BlockLinearUnswizzle3DPass::UnswizzleChunk(
|
||||
Image& image,
|
||||
const StagingBufferRef& swizzled,
|
||||
const VideoCommon::SwizzleParameters& sw,
|
||||
const BlockLinearSwizzle3DParams& params,
|
||||
u32 blocks_x, u32 blocks_y,
|
||||
u32 z_start, u32 z_count)
|
||||
{
|
||||
BlockLinearUnswizzle3DPushConstants pc{};
|
||||
pc.origin[0] = params.origin[0];
|
||||
pc.origin[1] = params.origin[1];
|
||||
pc.origin[2] = z_start; // Current chunk's Z start
|
||||
|
||||
pc.destination[0] = params.destination[0];
|
||||
pc.destination[1] = params.destination[1];
|
||||
pc.destination[2] = 0; // Shader writes to start of output buffer
|
||||
|
||||
pc.bytes_per_block_log2 = params.bytes_per_block_log2;
|
||||
pc.slice_size = params.slice_size;
|
||||
pc.block_size = params.block_size;
|
||||
pc.x_shift = params.x_shift;
|
||||
pc.block_height = params.block_height;
|
||||
pc.block_height_mask = params.block_height_mask;
|
||||
pc.block_depth = params.block_depth;
|
||||
pc.block_depth_mask = params.block_depth_mask;
|
||||
|
||||
pc.blocks_dim[0] = blocks_x;
|
||||
pc.blocks_dim[1] = blocks_y;
|
||||
pc.blocks_dim[2] = z_count; // Only process the count
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
|
||||
image.runtime->swizzle_table_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
|
||||
sw.buffer_offset + swizzled.offset,
|
||||
image.guest_size_bytes - sw.buffer_offset);
|
||||
compute_pass_descriptor_queue.AddBuffer(*image.compute_unswizzle_buffer, 0,
|
||||
image.compute_unswizzle_buffer_size);
|
||||
|
||||
const void* descriptor_data = compute_pass_descriptor_queue.UpdateData();
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
|
||||
const u32 gx = Common::DivCeil(blocks_x, 8u);
|
||||
const u32 gy = Common::DivCeil(blocks_y, 8u);
|
||||
const u32 gz = Common::DivCeil(z_count, 4u);
|
||||
|
||||
const u32 bytes_per_block = 1u << pc.bytes_per_block_log2;
|
||||
const VkDeviceSize output_slice_size =
|
||||
static_cast<VkDeviceSize>(blocks_x) * blocks_y * bytes_per_block;
|
||||
const VkDeviceSize barrier_size = output_slice_size * z_count;
|
||||
|
||||
const bool is_first_chunk = (z_start == 0);
|
||||
|
||||
const VkBuffer out_buffer = *image.compute_unswizzle_buffer;
|
||||
const VkImage dst_image = image.Handle();
|
||||
const VkImageAspectFlags aspect = image.AspectMask();
|
||||
const u32 image_width = image.info.size.width;
|
||||
const u32 image_height = image.info.size.height;
|
||||
|
||||
scheduler.Record([this, set, descriptor_data, pc, gx, gy, gz, z_start, z_count,
|
||||
barrier_size, is_first_chunk, out_buffer, dst_image, aspect,
|
||||
image_width, image_height
|
||||
](vk::CommandBuffer cmdbuf) {
|
||||
|
||||
if (dst_image == VK_NULL_HANDLE || out_buffer == VK_NULL_HANDLE) {
|
||||
return;
|
||||
}
|
||||
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc);
|
||||
cmdbuf.Dispatch(gx, gy, gz);
|
||||
|
||||
// Single barrier for compute -> transfer (buffer ready, image transition)
|
||||
const VkBufferMemoryBarrier buffer_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = out_buffer,
|
||||
.offset = 0,
|
||||
.size = barrier_size,
|
||||
};
|
||||
|
||||
// Image layout transition
|
||||
const VkImageMemoryBarrier pre_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = is_first_chunk ? VkAccessFlags{} :
|
||||
static_cast<VkAccessFlags>(VK_ACCESS_TRANSFER_WRITE_BIT),
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED :
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange = {aspect, 0, 1, 0, 1},
|
||||
};
|
||||
|
||||
// Single barrier handles both buffer and image
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
nullptr, buffer_barrier, pre_barrier
|
||||
);
|
||||
|
||||
// Copy chunk to correct Z position in image
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = 0, // Read from start of staging buffer
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = {aspect, 0, 0, 1},
|
||||
.imageOffset = {0, 0, static_cast<s32>(z_start)}, // Write to correct Z
|
||||
.imageExtent = {image_width, image_height, z_count},
|
||||
};
|
||||
cmdbuf.CopyBufferToImage(out_buffer, dst_image,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
|
||||
|
||||
// Post-copy transition
|
||||
const VkImageMemoryBarrier post_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange = {aspect, 0, 1, 0, 1},
|
||||
};
|
||||
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0,
|
||||
nullptr, nullptr, post_barrier
|
||||
);
|
||||
});
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
MSAACopyPass::MSAACopyPass(const Device& device_, Scheduler& scheduler_,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -14,6 +17,7 @@
|
|||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
struct SwizzleParameters;
|
||||
|
|
@ -21,6 +25,8 @@ struct SwizzleParameters;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::Accelerated::BlockLinearSwizzle3DParams;
|
||||
|
||||
class Device;
|
||||
class StagingBufferPool;
|
||||
class Scheduler;
|
||||
|
|
@ -131,6 +137,34 @@ private:
|
|||
MemoryAllocator& memory_allocator;
|
||||
};
|
||||
|
||||
class BlockLinearUnswizzle3DPass final : public ComputePass {
|
||||
public:
|
||||
explicit BlockLinearUnswizzle3DPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
|
||||
~BlockLinearUnswizzle3DPass();
|
||||
|
||||
void Unswizzle(Image& image,
|
||||
const StagingBufferRef& swizzled,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles,
|
||||
u32 z_start, u32 z_count);
|
||||
|
||||
void UnswizzleChunk(
|
||||
Image& image,
|
||||
const StagingBufferRef& swizzled,
|
||||
const VideoCommon::SwizzleParameters& sw,
|
||||
const BlockLinearSwizzle3DParams& params,
|
||||
u32 blocks_x, u32 blocks_y,
|
||||
u32 z_start, u32 z_count);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
|
||||
};
|
||||
|
||||
|
||||
class MSAACopyPass final : public ComputePass {
|
||||
public:
|
||||
explicit MSAACopyPass(const Device& device_, Scheduler& scheduler_,
|
||||
|
|
|
|||
|
|
@ -43,6 +43,16 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
|
|||
: device{device_}, state_tracker{state_tracker_},
|
||||
master_semaphore{std::make_unique<MasterSemaphore>(device)},
|
||||
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
|
||||
|
||||
/*// PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes
|
||||
{
|
||||
std::scoped_lock rl{reserve_mutex};
|
||||
chunk_reserve.reserve(2048); // Prevent vector resizing
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
chunk_reserve.push_back(std::make_unique<CommandChunk>());
|
||||
}
|
||||
}*/
|
||||
|
||||
AcquireNewChunk();
|
||||
AllocateWorkerCommandBuffer();
|
||||
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
|
||||
|
|
|
|||
|
|
@ -24,12 +24,14 @@
|
|||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/formatter.h"
|
||||
#include "video_core/texture_cache/samples_helper.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
|
|
@ -878,14 +880,51 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
bl3d_unswizzle_pass.emplace(device, scheduler, descriptor_pool,
|
||||
staging_buffer_pool, compute_pass_descriptor_queue);
|
||||
|
||||
// --- Create swizzle table buffer ---
|
||||
{
|
||||
auto table = Tegra::Texture::MakeSwizzleTable();
|
||||
|
||||
swizzle_table_size = static_cast<VkDeviceSize>(table.size() * sizeof(table[0]));
|
||||
|
||||
auto staging = staging_buffer_pool.Request(swizzle_table_size, MemoryUsage::Upload);
|
||||
std::memcpy(staging.mapped_span.data(), table.data(), static_cast<size_t>(swizzle_table_size));
|
||||
|
||||
VkBufferCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = swizzle_table_size,
|
||||
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
swizzle_table_buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::DeviceLocal);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging_buf = staging.buffer,
|
||||
dst_buf = *swizzle_table_buffer,
|
||||
size = swizzle_table_size,
|
||||
src_off = staging.offset](vk::CommandBuffer cmdbuf) {
|
||||
|
||||
const VkBufferCopy region{
|
||||
.srcOffset = src_off,
|
||||
.dstOffset = 0,
|
||||
.size = size,
|
||||
};
|
||||
cmdbuf.CopyBuffer(staging_buf, dst_buf, region);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
|
||||
StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size, bool deferred) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Upload, deferred);
|
||||
}
|
||||
|
||||
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
|
||||
|
|
@ -1581,6 +1620,46 @@ Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBas
|
|||
|
||||
Image::~Image() = default;
|
||||
|
||||
void Image::AllocateComputeUnswizzleBuffer(u32 max_slices) {
|
||||
if (has_compute_unswizzle_buffer)
|
||||
return;
|
||||
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
|
||||
const u32 block_bytes = BytesPerBlock(info.format); // 8 for BC1, 16 for BC6H
|
||||
const u32 block_width = 4;
|
||||
const u32 block_height = 4;
|
||||
|
||||
// BCn is 4x4x1 blocks
|
||||
const u32 blocks_x = (info.size.width + block_width - 1) / block_width;
|
||||
const u32 blocks_y = (info.size.height + block_height - 1) / block_height;
|
||||
const u32 blocks_z = std::min(max_slices, info.size.depth);
|
||||
|
||||
const u64 block_count =
|
||||
static_cast<u64>(blocks_x) *
|
||||
static_cast<u64>(blocks_y) *
|
||||
static_cast<u64>(blocks_z);
|
||||
|
||||
compute_unswizzle_buffer_size = block_count * block_bytes;
|
||||
|
||||
VkBufferCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = compute_unswizzle_buffer_size,
|
||||
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
|
||||
compute_unswizzle_buffer =
|
||||
runtime->memory_allocator.CreateBuffer(ci, MemoryUsage::DeviceLocal);
|
||||
|
||||
has_compute_unswizzle_buffer = true;
|
||||
}
|
||||
|
||||
void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
// TODO: Move this to another API
|
||||
|
|
@ -2397,10 +2476,22 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
|
|||
|
||||
void TextureCacheRuntime::AccelerateImageUpload(
|
||||
Image& image, const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles,
|
||||
u32 z_start, u32 z_count) {
|
||||
|
||||
if (IsPixelFormatASTC(image.info.format)) {
|
||||
return astc_decoder_pass->Assemble(image, map, swizzles);
|
||||
}
|
||||
|
||||
if (bl3d_unswizzle_pass &&
|
||||
IsPixelFormatBCn(image.info.format) &&
|
||||
image.info.type == ImageType::e3D &&
|
||||
image.info.resources.levels == 1 &&
|
||||
image.info.resources.layers == 1) {
|
||||
|
||||
return bl3d_unswizzle_pass->Unswizzle(image, map, swizzles, z_start, z_count);
|
||||
}
|
||||
|
||||
ASSERT(false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ public:
|
|||
|
||||
void Finish();
|
||||
|
||||
StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
StagingBufferRef UploadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
|
|
@ -91,7 +91,8 @@ public:
|
|||
}
|
||||
|
||||
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
||||
std::span<const VideoCommon::SwizzleParameters>);
|
||||
std::span<const VideoCommon::SwizzleParameters>,
|
||||
u32 z_start, u32 z_count);
|
||||
|
||||
void InsertUploadMemoryBarrier() {}
|
||||
|
||||
|
|
@ -127,6 +128,11 @@ public:
|
|||
BlitImageHelper& blit_image_helper;
|
||||
RenderPassCache& render_pass_cache;
|
||||
std::optional<ASTCDecoderPass> astc_decoder_pass;
|
||||
|
||||
std::optional<BlockLinearUnswizzle3DPass> bl3d_unswizzle_pass;
|
||||
vk::Buffer swizzle_table_buffer;
|
||||
VkDeviceSize swizzle_table_size = 0;
|
||||
|
||||
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
|
||||
|
|
@ -164,6 +170,8 @@ public:
|
|||
void DownloadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void AllocateComputeUnswizzleImage();
|
||||
|
||||
[[nodiscard]] VkImage Handle() const noexcept {
|
||||
return *(this->*current_image);
|
||||
}
|
||||
|
|
@ -189,6 +197,10 @@ public:
|
|||
|
||||
bool ScaleDown(bool ignore = false);
|
||||
|
||||
u64 allocation_tick;
|
||||
|
||||
friend class BlockLinearUnswizzle3DPass;
|
||||
|
||||
private:
|
||||
bool BlitScaleHelper(bool scale_up);
|
||||
|
||||
|
|
@ -200,6 +212,12 @@ private:
|
|||
vk::Image original_image;
|
||||
vk::Image scaled_image;
|
||||
|
||||
vk::Buffer compute_unswizzle_buffer;
|
||||
VkDeviceSize compute_unswizzle_buffer_size = 0;
|
||||
bool has_compute_unswizzle_buffer = false;
|
||||
|
||||
void AllocateComputeUnswizzleBuffer(u32 max_slices);
|
||||
|
||||
// Use a pointer to field because it is relative, so that the object can be
|
||||
// moved without breaking the reference.
|
||||
vk::Image Image::*current_image{};
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <bit>
|
||||
#include <unordered_set>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
|
|
@ -22,6 +23,7 @@
|
|||
#include "video_core/texture_cache/samples_helper.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
|
|
@ -68,10 +70,41 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
|
|||
(std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical),
|
||||
DEFAULT_CRITICAL_MEMORY));
|
||||
minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2);
|
||||
|
||||
lowmemorydevice = false;
|
||||
} else {
|
||||
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
|
||||
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
|
||||
minimum_memory = 0;
|
||||
|
||||
lowmemorydevice = true;
|
||||
}
|
||||
|
||||
switch (Settings::values.gpu_unzwizzle_texture_size.GetValue()) {
|
||||
case Settings::GpuUnswizzleSize::VerySmall: gpu_unswizzle_maxsize = 16_MiB; break;
|
||||
case Settings::GpuUnswizzleSize::Small: gpu_unswizzle_maxsize = 32_MiB; break;
|
||||
case Settings::GpuUnswizzleSize::Normal: gpu_unswizzle_maxsize = 128_MiB; break;
|
||||
case Settings::GpuUnswizzleSize::Large: gpu_unswizzle_maxsize = 256_MiB; break;
|
||||
case Settings::GpuUnswizzleSize::VeryLarge: gpu_unswizzle_maxsize = 512_MiB; break;
|
||||
default: gpu_unswizzle_maxsize = 128_MiB; break;
|
||||
}
|
||||
|
||||
switch (Settings::values.gpu_unzwizzle_stream_size.GetValue()) {
|
||||
case Settings::GpuUnswizzle::VeryLow: swizzle_chunk_size = 4_MiB; break;
|
||||
case Settings::GpuUnswizzle::Low: swizzle_chunk_size = 8_MiB; break;
|
||||
case Settings::GpuUnswizzle::Normal: swizzle_chunk_size = 16_MiB; break;
|
||||
case Settings::GpuUnswizzle::Medium: swizzle_chunk_size = 32_MiB; break;
|
||||
case Settings::GpuUnswizzle::High: swizzle_chunk_size = 64_MiB; break;
|
||||
default: swizzle_chunk_size = 16_MiB;
|
||||
}
|
||||
|
||||
switch (Settings::values.gpu_unzwizzle_chunk_size.GetValue()) {
|
||||
case Settings::GpuUnswizzleChunk::VeryLow: swizzle_slices_per_batch = 32; break;
|
||||
case Settings::GpuUnswizzleChunk::Low: swizzle_slices_per_batch = 64; break;
|
||||
case Settings::GpuUnswizzleChunk::Normal: swizzle_slices_per_batch = 128; break;
|
||||
case Settings::GpuUnswizzleChunk::Medium: swizzle_slices_per_batch = 256; break;
|
||||
case Settings::GpuUnswizzleChunk::High: swizzle_slices_per_batch = 512; break;
|
||||
default: swizzle_slices_per_batch = 128;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -88,6 +121,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
|
||||
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
|
||||
};
|
||||
|
||||
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
|
||||
&aggressive_mode](ImageId image_id) {
|
||||
if (num_iterations == 0) {
|
||||
|
|
@ -95,20 +129,36 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
}
|
||||
--num_iterations;
|
||||
auto& image = slot_images[image_id];
|
||||
|
||||
// Never delete recently allocated sparse textures (within 3 frames)
|
||||
const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3;
|
||||
if (is_recently_allocated && image.info.is_sparse) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (True(image.flags & ImageFlagBits::IsDecoding)) {
|
||||
// This image is still being decoded, deleting it will invalidate the slot
|
||||
// used by the async decoder thread.
|
||||
return false;
|
||||
}
|
||||
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
|
||||
|
||||
// Prioritize large sparse textures for cleanup
|
||||
const bool is_large_sparse = lowmemorydevice &&
|
||||
image.info.is_sparse &&
|
||||
image.guest_size_bytes >= 256_MiB;
|
||||
|
||||
if (!aggressive_mode && !is_large_sparse &&
|
||||
True(image.flags & ImageFlagBits::CostlyLoad)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool must_download =
|
||||
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
|
||||
if (!high_priority_mode && must_download) {
|
||||
if (!high_priority_mode && !is_large_sparse && must_download) {
|
||||
return false;
|
||||
}
|
||||
if (must_download) {
|
||||
|
||||
if (must_download && !is_large_sparse) {
|
||||
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||
const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info));
|
||||
image.DownloadMemory(map, copies);
|
||||
|
|
@ -116,11 +166,13 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
||||
swizzle_data_buffer);
|
||||
}
|
||||
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, image_id);
|
||||
}
|
||||
UnregisterImage(image_id);
|
||||
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
||||
|
||||
if (total_used_memory < critical_memory) {
|
||||
if (aggressive_mode) {
|
||||
// Sink the aggresiveness.
|
||||
|
|
@ -136,7 +188,24 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
return false;
|
||||
};
|
||||
|
||||
// Try to remove anything old enough and not high priority.
|
||||
// Aggressively clear massive sparse textures
|
||||
if (total_used_memory >= expected_memory) {
|
||||
lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) {
|
||||
auto& image = slot_images[image_id];
|
||||
// Only target sparse textures that are old enough
|
||||
if (lowmemorydevice &&
|
||||
image.info.is_sparse &&
|
||||
image.guest_size_bytes >= 256_MiB &&
|
||||
image.allocation_tick < frame_tick - 3) {
|
||||
LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)",
|
||||
image.gpu_addr, image.guest_size_bytes / (1024 * 1024),
|
||||
frame_tick - image.allocation_tick);
|
||||
return Cleanup(image_id);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
Configure(false);
|
||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
||||
|
||||
|
|
@ -160,6 +229,7 @@ void TextureCache<P>::TickFrame() {
|
|||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
TickAsyncDecode();
|
||||
TickAsyncUnswizzle();
|
||||
|
||||
runtime.TickFrame();
|
||||
++frame_tick;
|
||||
|
|
@ -627,7 +697,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
|
|||
UntrackImage(image, id);
|
||||
}
|
||||
}
|
||||
|
||||
if (True(image.flags & ImageFlagBits::Remapped)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1055,7 +1124,12 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
|||
// Only upload modified images
|
||||
return;
|
||||
}
|
||||
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) {
|
||||
return;
|
||||
}
|
||||
|
||||
TrackImage(image, image_id);
|
||||
|
||||
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
|
||||
|
|
@ -1067,6 +1141,16 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
|||
QueueAsyncDecode(image, image_id);
|
||||
return;
|
||||
}
|
||||
if (IsPixelFormatBCn(image.info.format) &&
|
||||
image.info.type == ImageType::e3D &&
|
||||
image.info.resources.levels == 1 &&
|
||||
image.info.resources.layers == 1 &&
|
||||
MapSizeBytes(image) >= gpu_unswizzle_maxsize &&
|
||||
False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
|
||||
QueueAsyncUnswizzle(image, image_id);
|
||||
return;
|
||||
}
|
||||
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||
UploadImageContents(image, staging);
|
||||
runtime.InsertUploadMemoryBarrier();
|
||||
|
|
@ -1082,7 +1166,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
|||
gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
|
||||
VideoCommon::CacheType::NoTextureCache);
|
||||
const auto uploads = FullUploadSwizzles(image.info);
|
||||
runtime.AccelerateImageUpload(image, staging, FixSmallVectorADL(uploads));
|
||||
runtime.AccelerateImageUpload(image, staging, FixSmallVectorADL(uploads), 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1311,6 +1395,20 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
|
|||
texture_decode_worker.QueueWork(std::move(func));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::QueueAsyncUnswizzle(Image& image, ImageId image_id) {
|
||||
if (True(image.flags & ImageFlagBits::IsDecoding)) {
|
||||
return;
|
||||
}
|
||||
|
||||
image.flags |= ImageFlagBits::IsDecoding;
|
||||
|
||||
unswizzle_queue.push_back({
|
||||
.image_id = image_id,
|
||||
.info = image.info
|
||||
});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickAsyncDecode() {
|
||||
bool has_uploads{};
|
||||
|
|
@ -1336,6 +1434,83 @@ void TextureCache<P>::TickAsyncDecode() {
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickAsyncUnswizzle() {
|
||||
if (unswizzle_queue.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(current_unswizzle_frame > 0) {
|
||||
current_unswizzle_frame--;
|
||||
return;
|
||||
}
|
||||
|
||||
PendingUnswizzle& task = unswizzle_queue.front();
|
||||
Image& image = slot_images[task.image_id];
|
||||
|
||||
if (!task.initialized) {
|
||||
task.total_size = MapSizeBytes(image);
|
||||
task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true);
|
||||
|
||||
const auto& info = image.info;
|
||||
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||
const u32 width_blocks = Common::DivCeil(info.size.width, 4u);
|
||||
const u32 height_blocks = Common::DivCeil(info.size.height, 4u);
|
||||
|
||||
const u32 stride = width_blocks * bytes_per_block;
|
||||
const u32 aligned_height = height_blocks;
|
||||
task.bytes_per_slice = static_cast<size_t>(stride) * aligned_height;
|
||||
task.last_submitted_offset = 0;
|
||||
task.initialized = true;
|
||||
}
|
||||
|
||||
// Read data
|
||||
if (task.current_offset < task.total_size) {
|
||||
const size_t remaining = task.total_size - task.current_offset;
|
||||
|
||||
size_t copy_amount = std::min(swizzle_chunk_size, remaining);
|
||||
|
||||
if (remaining > swizzle_chunk_size) {
|
||||
copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice;
|
||||
if (copy_amount == 0) copy_amount = task.bytes_per_slice;
|
||||
}
|
||||
|
||||
gpu_memory->ReadBlock(image.gpu_addr + task.current_offset,
|
||||
task.staging_buffer.mapped_span.data() + task.current_offset,
|
||||
copy_amount);
|
||||
task.current_offset += copy_amount;
|
||||
}
|
||||
|
||||
const bool is_final_batch = task.current_offset >= task.total_size;
|
||||
const size_t bytes_ready = task.current_offset - task.last_submitted_offset;
|
||||
const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice);
|
||||
|
||||
if (complete_slices >= swizzle_slices_per_batch || (is_final_batch && complete_slices > 0)) {
|
||||
const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
|
||||
const u32 slices_to_process = std::min(complete_slices, swizzle_slices_per_batch);
|
||||
const u32 z_count = std::min(slices_to_process, image.info.size.depth - z_start);
|
||||
|
||||
if (z_count > 0) {
|
||||
const auto uploads = FullUploadSwizzles(task.info);
|
||||
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads), z_start, z_count);
|
||||
task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if complete
|
||||
const u32 slices_submitted = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
|
||||
const bool all_slices_submitted = slices_submitted >= image.info.size.depth;
|
||||
|
||||
if (is_final_batch && all_slices_submitted) {
|
||||
runtime.FreeDeferredStagingBuffer(task.staging_buffer);
|
||||
image.flags &= ~ImageFlagBits::IsDecoding;
|
||||
unswizzle_queue.pop_front();
|
||||
|
||||
// Wait 4 frames to process the next entry
|
||||
current_unswizzle_frame = 4u;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool TextureCache<P>::ScaleUp(Image& image) {
|
||||
const bool has_copy = image.HasScaled();
|
||||
|
|
@ -1374,6 +1549,39 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|||
}
|
||||
}
|
||||
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
|
||||
|
||||
// For large sparse textures, aggressively clean up old allocations at same address
|
||||
if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) {
|
||||
const auto alloc_it = image_allocs_table.find(gpu_addr);
|
||||
if (alloc_it != image_allocs_table.end()) {
|
||||
const ImageAllocId alloc_id = alloc_it->second;
|
||||
auto& alloc_images = slot_image_allocs[alloc_id].images;
|
||||
|
||||
// Collect old images at this address that were created more than 2 frames ago
|
||||
boost::container::small_vector<ImageId, 4> to_delete;
|
||||
for (ImageId old_image_id : alloc_images) {
|
||||
Image& old_image = slot_images[old_image_id];
|
||||
if (old_image.info.is_sparse &&
|
||||
old_image.gpu_addr == gpu_addr &&
|
||||
old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures
|
||||
to_delete.push_back(old_image_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete old images immediately
|
||||
for (ImageId old_id : to_delete) {
|
||||
Image& old_image = slot_images[old_id];
|
||||
LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)",
|
||||
gpu_addr, old_image.guest_size_bytes / (1024 * 1024));
|
||||
if (True(old_image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(old_image, old_id);
|
||||
}
|
||||
UnregisterImage(old_id);
|
||||
DeleteImage(old_id, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
|
||||
const Image& image = slot_images[image_id];
|
||||
// Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
|
||||
|
|
@ -1389,6 +1597,27 @@ template <class P>
|
|||
ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) {
|
||||
ImageInfo new_info = info;
|
||||
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
|
||||
|
||||
// Proactive cleanup for large sparse texture allocations
|
||||
if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) {
|
||||
const u64 estimated_alloc_size = size_bytes;
|
||||
|
||||
if (total_used_memory + estimated_alloc_size >= critical_memory) {
|
||||
LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. "
|
||||
"Current memory: {} MiB, Critical: {} MiB",
|
||||
size_bytes / (1024 * 1024),
|
||||
total_used_memory / (1024 * 1024),
|
||||
critical_memory / (1024 * 1024));
|
||||
RunGarbageCollector();
|
||||
|
||||
// If still over threshold after GC, try one more aggressive pass
|
||||
if (total_used_memory + estimated_alloc_size >= critical_memory) {
|
||||
LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass");
|
||||
RunGarbageCollector();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||
const bool native_bgr = runtime.HasNativeBgr();
|
||||
join_overlap_ids.clear();
|
||||
|
|
@ -1485,6 +1714,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
|
|||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
Image& new_image = slot_images[new_image_id];
|
||||
|
||||
new_image.allocation_tick = frame_tick;
|
||||
|
||||
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) &&
|
||||
new_info.is_sparse) {
|
||||
new_image.flags |= ImageFlagBits::Sparse;
|
||||
|
|
|
|||
|
|
@ -129,6 +129,17 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
|
|||
using AsyncBuffer = typename P::AsyncBuffer;
|
||||
using BufferType = typename P::BufferType;
|
||||
|
||||
struct PendingUnswizzle {
|
||||
ImageId image_id;
|
||||
VideoCommon::ImageInfo info;
|
||||
size_t current_offset = 0;
|
||||
size_t total_size = 0;
|
||||
AsyncBuffer staging_buffer;
|
||||
size_t last_submitted_offset = 0;
|
||||
size_t bytes_per_slice;
|
||||
bool initialized = false;
|
||||
};
|
||||
|
||||
struct BlitImages {
|
||||
ImageId dst_id;
|
||||
ImageId src_id;
|
||||
|
|
@ -433,6 +444,9 @@ private:
|
|||
void TrimInactiveSamplers(size_t budget);
|
||||
std::optional<size_t> QuerySamplerBudget() const;
|
||||
|
||||
void QueueAsyncUnswizzle(Image& image, ImageId image_id);
|
||||
void TickAsyncUnswizzle();
|
||||
|
||||
Runtime& runtime;
|
||||
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
|
|
@ -453,6 +467,10 @@ private:
|
|||
u64 minimum_memory;
|
||||
u64 expected_memory;
|
||||
u64 critical_memory;
|
||||
bool lowmemorydevice = false;
|
||||
size_t gpu_unswizzle_maxsize = 0;
|
||||
size_t swizzle_chunk_size = 0;
|
||||
u32 swizzle_slices_per_batch = 0;
|
||||
|
||||
struct BufferDownload {
|
||||
GPUVAddr address;
|
||||
|
|
@ -508,6 +526,9 @@ private:
|
|||
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||
|
||||
std::deque<PendingUnswizzle> unswizzle_queue;
|
||||
u8 current_unswizzle_frame;
|
||||
|
||||
// Join caching
|
||||
boost::container::small_vector<ImageId, 4> join_overlap_ids;
|
||||
std::unordered_set<ImageId> join_overlaps_found;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue