diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h index 2c2aaf7bb0..1f57d165a9 100644 --- a/src/video_core/invalidation_accumulator.h +++ b/src/video_core/invalidation_accumulator.h @@ -15,53 +15,37 @@ public: InvalidationAccumulator() = default; ~InvalidationAccumulator() = default; - void Add(GPUVAddr address, size_t size) { - const auto reset_values = [&]() { - if (has_collected) { + void Add(GPUVAddr address, size_t size) noexcept { + auto const end_address = start_address + accumulated_size; + if (!(address >= start_address && address + size <= end_address)) { + size = ((address + size + atomicity_size_mask) & atomicity_mask) - address; + address = address & atomicity_mask; + if (start_address == 0) { + start_address = address; + accumulated_size = size; + // will now have collected as this point + } else if (address != end_address) { buffer.emplace_back(start_address, accumulated_size); + start_address = address; + accumulated_size = size; + } else { + accumulated_size += size; } - start_address = address; - accumulated_size = size; - last_collection = start_address + size; - }; - if (address >= start_address && address + size <= last_collection) [[likely]] { - return; } - size = ((address + size + atomicity_size_mask) & atomicity_mask) - address; - address = address & atomicity_mask; - if (!has_collected) [[unlikely]] { - reset_values(); - has_collected = true; - return; - } - if (address != last_collection) [[unlikely]] { - reset_values(); - return; - } - accumulated_size += size; - last_collection += size; } - void Clear() { - buffer.clear(); - start_address = 0; - last_collection = 0; - has_collected = false; - } - - bool AnyAccumulated() const { - return has_collected; - } - - template - void Callback(Func&& func) { - if (!has_collected) { - return; - } - buffer.emplace_back(start_address, accumulated_size); - for (auto& [address, size] : buffer) { - func(address, size); + template + [[nodiscard]] bool InvalidateAll(F&& f) noexcept { + if (start_address > 0) { + for (auto [address, size] : buffer) + f(address, size); + f(start_address, accumulated_size); + buffer.clear(); + start_address = 0; + accumulated_size = 0; + return true; } + return false; } private: @@ -69,11 +53,9 @@ private: static constexpr size_t atomicity_size = 1ULL << atomicity_bits; static constexpr size_t atomicity_size_mask = atomicity_size - 1; static constexpr size_t atomicity_mask = ~atomicity_size_mask; - GPUVAddr start_address{}; - GPUVAddr last_collection{}; - size_t accumulated_size{}; - bool has_collected{}; std::vector> buffer; + GPUVAddr start_address = 0; + size_t accumulated_size = 0; }; } // namespace VideoCommon diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 41806bfd29..c583fbdf7d 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -26,16 +26,14 @@ using Tegra::Memory::GuestMemoryFlags; std::atomic MemoryManager::unique_identifier_generator{}; -MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, - u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_, - u64 page_bits_) - : system{system_}, memory{memory_}, address_space_bits{address_space_bits_}, - split_address{split_address_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, - entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, - page_bits != big_page_bits ? page_bits : 0}, - kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( - 1, std::memory_order_acq_rel)}, - accumulator{std::make_unique()} { +MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_) + : system{system_}, memory{memory_}, address_space_bits{address_space_bits_} + , split_address{split_address_}, page_bits{page_bits_}, big_page_bits{big_page_bits_} + , entries{}, big_entries{} + , page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0} + , kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} + , accumulator{} +{ address_space_size = 1ULL << address_space_bits; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; @@ -54,10 +52,9 @@ MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& entries.resize(page_table_size / 32, 0); } -MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, - GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_) - : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, split_address_, - big_page_bits_, page_bits_) {} +MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_) + : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, split_address_, big_page_bits_, page_bits_) +{} MemoryManager::~MemoryManager() = default; @@ -469,10 +466,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } -void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, - std::size_t size) { +void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); - accumulator->Add(gpu_dest_addr, size); + accumulator.Add(gpu_dest_addr, size); } void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, @@ -756,15 +752,13 @@ void MemoryManager::GetSubmappedRangeImpl( } void MemoryManager::FlushCaching() { - if (!accumulator->AnyAccumulated()) { - return; - } - accumulator->Callback([this](GPUVAddr addr, size_t size) { + // Flush from the invalidate accumulator + if (accumulator.InvalidateAll([this](GPUVAddr addr, size_t size) { GetSubmappedRangeImpl(addr, size, page_stash2); - }); - rasterizer->InnerInvalidation(VideoCommon::FixSmallVectorADL(page_stash2)); - page_stash2.clear(); - accumulator->Clear(); + })) { + rasterizer->InnerInvalidation(VideoCommon::FixSmallVectorADL(page_stash2)); + page_stash2.clear(); + } } const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 1be67b2d0c..c79c0e6ce0 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -18,6 +18,7 @@ #include "common/range_map.h" #include "common/scratch_buffer.h" #include "common/virtual_buffer.h" +#include "video_core/invalidation_accumulator.h" #include "video_core/cache_types.h" #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/pte_kind.h" @@ -26,10 +27,6 @@ namespace VideoCore { class RasterizerInterface; } -namespace VideoCommon { -class InvalidationAccumulator; -} - namespace Core { class System; } // namespace Core @@ -249,7 +246,7 @@ private: static constexpr size_t continuous_bits = 64; const size_t unique_identifier; - std::unique_ptr accumulator; + VideoCommon::InvalidationAccumulator accumulator; static std::atomic unique_identifier_generator;