[common, core] remove uneeded memory indirection overhead at startup (#3306)

for core stuff:
just remove unique ptrs that dont need any pointer stability at all (afterall its an allocation within an allocation so yeah)

for fibers:
Main reasoning behind this is because virtualBuffer<> is stupidly fucking expensive and it also clutters my fstat view
ALSO mmap is a syscall, syscalls are bad for performance or whatever
ALSO std::vector<> is better suited for handling this kind of "fixed size thing where its like big but not THAT big" (512 KiB isn't going to kill your memory usage for each fiber...)

for core.cpp stuff
- inlines stuff into std::optional<> as opposed to std::unique_ptr<> (because yknow, we are making the Impl from an unique_ptr, allocating within an allocation is unnecessary)
- reorganizes the structures a bit so padding doesnt screw us up (it's not perfect but eh saves a measly 44 bytes)
- removes unused/dead code
- uses std::vector<> instead of std::deque<>

no perf impact expected, maybe some initialisation boost but very minimal impact nonethless
lto gets rid of most calls anyways - the heavy issue is with shared_ptr and the cache coherency from the atomics... but i clumped them together because well, they kinda do not suffer from cache coherency - hopefully not a mistake

this balloons the size of Impl to about 1.67 MB - which is fine because we throw it in the stack anyways

REST OF INTERFACES: most of them ballooned in size as well, but overhead is ok since its an allocation within an alloc, no stack is used (when it comes to storing these i mean)

Signed-off-by: lizzie lizzie@eden-emu.dev
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3306
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-01-16 23:39:16 +01:00 committed by crueter
parent 5768600c8b
commit 83a28dc251
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
40 changed files with 2602 additions and 2963 deletions

View file

@ -376,7 +376,6 @@ void RasterizerVulkan::DrawTexture() {
}
void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork();
gpu_memory->FlushCaching();
@ -396,9 +395,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
scheduler.RequestRenderpass(framebuffer);
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable);
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
@ -443,14 +440,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
offset = 0;
return;
}
if (offset >= static_cast<s32>(limit)) {
offset = static_cast<s32>(limit);
if (offset >= s32(limit)) {
offset = s32(limit);
extent = 0;
return;
}
const u64 end_coord = static_cast<u64>(offset) + extent;
const u64 end_coord = u64(offset) + extent;
if (end_coord > limit) {
extent = limit - static_cast<u32>(offset);
extent = limit - u32(offset);
}
};
@ -464,30 +461,22 @@ void RasterizerVulkan::Clear(u32 layer_count) {
const u32 color_attachment = regs.clear_surface.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
const auto format =
VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
bool is_integer = IsPixelFormatInteger(format);
bool is_signed = IsPixelFormatSignedInteger(format);
size_t int_size = PixelComponentSizeBitsInteger(format);
VkClearValue clear_value{};
if (!is_integer) {
std::memcpy(clear_value.color.float32, regs.clear_color.data(),
regs.clear_color.size() * sizeof(f32));
std::memcpy(clear_value.color.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32));
} else if (!is_signed) {
for (size_t i = 0; i < 4; i++) {
clear_value.color.uint32[i] = static_cast<u32>(
static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
}
for (size_t i = 0; i < 4; i++)
clear_value.color.uint32[i] = u32(f32(u64(int_size) << 1U) * regs.clear_color[i]);
} else {
for (size_t i = 0; i < 4; i++) {
clear_value.color.int32[i] =
static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
(regs.clear_color[i] - 0.5f));
}
for (size_t i = 0; i < 4; i++)
clear_value.color.int32[i] = s32(f32(s64(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f));
}
if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B &&
regs.clear_surface.A) {
if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && regs.clear_surface.A) {
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
const VkClearAttachment attachment{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
@ -497,14 +486,11 @@ void RasterizerVulkan::Clear(u32 layer_count) {
cmdbuf.ClearAttachments(attachment, clear_rect);
});
} else {
u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 |
regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
u8 color_mask = u8(regs.clear_surface.R | regs.clear_surface.G << 1 | regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x +
static_cast<s32>(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y +
static_cast<s32>(clear_rect.rect.extent.height)}};
Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}};
blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region);
}
}
@ -527,11 +513,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
regs.stencil_front_mask != 0) {
Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y +
static_cast<s32>(clear_rect.rect.extent.height)}};
Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}};
blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth,
static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil,
u8(regs.stencil_front_mask), regs.clear_stencil,
regs.stencil_front_func_mask, dst_region);
} else {
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,

View file

@ -860,8 +860,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
compute_pass_descriptor_queue, memory_allocator);
}
if (device.IsStorageImageMultisampleSupported()) {
msaa_copy_pass = std::make_unique<MSAACopyPass>(
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
msaa_copy_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
}
if (!device.IsKhrImageFormatListSupported()) {
return;
@ -1675,10 +1674,10 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
// CHANGE: Gate the MSAA path more strictly and only use it for color, when the pass and device
// support are available. Avoid running the MSAA path when prerequisites aren't met,
// preventing validation and runtime issues.
const bool wants_msaa_upload = info.num_samples > 1 &&
(aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0 &&
runtime->CanUploadMSAA() && runtime->msaa_copy_pass != nullptr &&
runtime->device.IsStorageImageMultisampleSupported();
const bool wants_msaa_upload = info.num_samples > 1
&& (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0
&& runtime->CanUploadMSAA() && runtime->msaa_copy_pass.has_value()
&& runtime->device.IsStorageImageMultisampleSupported();
if (wants_msaa_upload) {
// Create a temporary non-MSAA image to upload the data first
@ -2047,8 +2046,7 @@ bool Image::BlitScaleHelper(bool scale_up) {
const u32 scaled_width = resolution.ScaleUp(info.size.width);
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
std::unique_ptr<Framebuffer>& blit_framebuffer =
scale_up ? scale_framebuffer : normal_framebuffer;
std::optional<Framebuffer>& blit_framebuffer = scale_up ? scale_framebuffer : normal_framebuffer;
if (!blit_view) {
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
@ -2060,11 +2058,11 @@ bool Image::BlitScaleHelper(bool scale_up) {
const u32 dst_height = scale_up ? scaled_height : info.size.height;
const Region2D src_region{
.start = {0, 0},
.end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
.end = {s32(src_width), s32(src_height)},
};
const Region2D dst_region{
.start = {0, 0},
.end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
.end = {s32(dst_width), s32(dst_height)},
};
const VkExtent2D extent{
.width = (std::max)(scaled_width, info.size.width),
@ -2073,21 +2071,15 @@ bool Image::BlitScaleHelper(bool scale_up) {
auto* view_ptr = blit_view.get();
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (!blit_framebuffer) {
blit_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent, scale_up);
}
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region,
src_region, operation, BLIT_OPERATION);
if (!blit_framebuffer)
blit_framebuffer.emplace(*runtime, view_ptr, nullptr, extent, scale_up);
runtime->blit_image_helper.BlitColor(&*blit_framebuffer, *blit_view,
dst_region, src_region, operation, BLIT_OPERATION);
} else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!blit_framebuffer) {
blit_framebuffer =
std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent, scale_up);
}
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view,
dst_region, src_region, operation,
BLIT_OPERATION);
if (!blit_framebuffer)
blit_framebuffer.emplace(*runtime, nullptr, view_ptr, extent, scale_up);
runtime->blit_image_helper.BlitDepthStencil(&*blit_framebuffer, *blit_view,
dst_region, src_region, operation, BLIT_OPERATION);
} else {
// TODO: Use helper blits where applicable
flags &= ~ImageFlagBits::Rescaled;
@ -2200,9 +2192,9 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
: ImageView{runtime, info, image_id_, image} {
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
: ImageView{runtime, info, image_id_, image}
{
slot_images = &slot_imgs;
}
@ -2267,33 +2259,25 @@ VkImageView ImageView::ColorView() {
VkImageView ImageView::StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format) {
if (!image_handle) {
return VK_NULL_HANDLE;
}
if (image_format == Shader::ImageFormat::Typeless) {
return Handle(texture_type);
}
const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
image_format == Shader::ImageFormat::R16_SINT};
if (!storage_views) {
storage_views = std::make_unique<StorageViews>();
}
auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
auto& view{views[static_cast<size_t>(texture_type)]};
if (view) {
if (image_handle) {
if (image_format == Shader::ImageFormat::Typeless) {
return Handle(texture_type);
}
const bool is_signed = image_format == Shader::ImageFormat::R8_SINT
|| image_format == Shader::ImageFormat::R16_SINT;
if (!storage_views)
storage_views.emplace();
auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
auto& view{views[size_t(texture_type)]};
if (!view)
view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
return *view;
}
view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
return *view;
return VK_NULL_HANDLE;
}
bool ImageView::IsRescaled() const noexcept {
if (!slot_images) {
return false;
}
const auto& slots = *slot_images;
const auto& src_image = slots[image_id];
return src_image.IsRescaled();
return (*slot_images)[image_id].IsRescaled();
}
vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {

View file

@ -133,7 +133,7 @@ public:
vk::Buffer swizzle_table_buffer;
VkDeviceSize swizzle_table_size = 0;
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
std::optional<MSAACopyPass> msaa_copy_pass;
const Settings::ResolutionScalingInfo& resolution;
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
@ -141,6 +141,89 @@ public:
std::array<vk::Buffer, indexing_slots> buffers{};
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
void CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
bool is_rescaled = false);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] u32 NumColorBuffers() const noexcept {
return num_color_buffers;
}
[[nodiscard]] u32 NumImages() const noexcept {
return num_images;
}
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
return images;
}
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
return image_ranges;
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
return has_depth;
}
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
return has_stencil;
}
[[nodiscard]] bool IsRescaled() const noexcept {
return is_rescaled;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
std::array<size_t, NUM_RT> rt_map{};
bool has_depth{};
bool has_stencil{};
bool is_rescaled{};
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
@ -226,10 +309,9 @@ private:
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
std::unique_ptr<Framebuffer> scale_framebuffer;
std::optional<Framebuffer> scale_framebuffer;
std::optional<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> scale_view;
std::unique_ptr<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> normal_view;
};
@ -297,7 +379,7 @@ private:
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
std::optional<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
vk::ImageView color_view;
@ -331,89 +413,6 @@ private:
vk::Sampler sampler_default_anisotropy;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
void CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
bool is_rescaled = false);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] u32 NumColorBuffers() const noexcept {
return num_color_buffers;
}
[[nodiscard]] u32 NumImages() const noexcept {
return num_images;
}
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
return images;
}
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
return image_ranges;
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
return has_depth;
}
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
return has_stencil;
}
[[nodiscard]] bool IsRescaled() const noexcept {
return is_rescaled;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
std::array<size_t, NUM_RT> rt_map{};
bool has_depth{};
bool has_stencil{};
bool is_rescaled{};
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;