mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-13 07:09:00 +02:00
[buffer_cache] Add batching support for memory tracker updates (#3288)
I added a batching/ coalescing of ranges in WordManager to reduce calls per pages in UpdatePagesCachedCount, also a test to verify if FlushCachedWrites coalesced (reduces callings to UpdatePagesCachedCount) callings and register each of them to inspect them. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3288 Reviewed-by: Maufeat <sahyno1996@gmail.com> Reviewed-by: DraVee <dravee@eden-emu.dev> Co-authored-by: CamilleLaVey <camillelavey99@gmail.com> Co-committed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
parent
51cc1bc6be
commit
1a9b4b37e1
4 changed files with 155 additions and 6 deletions
|
|
@ -11,6 +11,7 @@
|
|||
#include <limits>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_funcs.h"
|
||||
|
|
@ -256,9 +257,10 @@ public:
|
|||
std::span<u64> state_words = words.template Span<type>();
|
||||
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
|
||||
[[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
|
||||
std::vector<std::pair<VAddr, u64>> ranges;
|
||||
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
|
||||
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
|
||||
NotifyRasterizer<!enable>(index, untracked_words[index], mask);
|
||||
CollectChangedRanges<(!enable)>(index, untracked_words[index], mask, ranges);
|
||||
}
|
||||
if constexpr (enable) {
|
||||
state_words[index] |= mask;
|
||||
|
|
@ -279,6 +281,9 @@ public:
|
|||
}
|
||||
}
|
||||
});
|
||||
if (!ranges.empty()) {
|
||||
ApplyCollectedRanges(ranges, (!enable) ? 1 : -1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -304,6 +309,7 @@ public:
|
|||
func(cpu_addr + pending_offset * BYTES_PER_PAGE,
|
||||
(pending_pointer - pending_offset) * BYTES_PER_PAGE);
|
||||
};
|
||||
std::vector<std::pair<VAddr, u64>> ranges;
|
||||
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
||||
if constexpr (type == Type::GPU) {
|
||||
mask &= ~untracked_words[index];
|
||||
|
|
@ -311,7 +317,7 @@ public:
|
|||
const u64 word = state_words[index] & mask;
|
||||
if constexpr (clear) {
|
||||
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
|
||||
NotifyRasterizer<true>(index, untracked_words[index], mask);
|
||||
CollectChangedRanges<true>(index, untracked_words[index], mask, ranges);
|
||||
}
|
||||
state_words[index] &= ~mask;
|
||||
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
|
||||
|
|
@ -343,6 +349,9 @@ public:
|
|||
if (pending) {
|
||||
release();
|
||||
}
|
||||
if (!ranges.empty()) {
|
||||
ApplyCollectedRanges(ranges, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -425,13 +434,17 @@ public:
|
|||
u64* const cached_words = Array<Type::CachedCPU>();
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const cpu_words = Array<Type::CPU>();
|
||||
std::vector<std::pair<VAddr, u64>> ranges;
|
||||
for (u64 word_index = 0; word_index < num_words; ++word_index) {
|
||||
const u64 cached_bits = cached_words[word_index];
|
||||
NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
|
||||
CollectChangedRanges<false>(word_index, untracked_words[word_index], cached_bits, ranges);
|
||||
untracked_words[word_index] |= cached_bits;
|
||||
cpu_words[word_index] |= cached_bits;
|
||||
cached_words[word_index] = 0;
|
||||
}
|
||||
if (!ranges.empty()) {
|
||||
ApplyCollectedRanges(ranges, -1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -470,6 +483,40 @@ private:
|
|||
*
|
||||
* @tparam add_to_tracker True when the tracker should start tracking the new pages
|
||||
*/
|
||||
template <bool add_to_tracker>
|
||||
void CollectChangedRanges(u64 word_index, u64 current_bits, u64 new_bits,
|
||||
std::vector<std::pair<VAddr, u64>>& out_ranges) const {
|
||||
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
||||
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
||||
IteratePages(changed_bits, [&](size_t offset, size_t size) {
|
||||
out_ranges.emplace_back(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE);
|
||||
});
|
||||
}
|
||||
|
||||
void ApplyCollectedRanges(std::vector<std::pair<VAddr, u64>>& ranges, int delta) const {
|
||||
if (ranges.empty()) return;
|
||||
std::sort(ranges.begin(), ranges.end(),
|
||||
[](const auto& a, const auto& b) { return a.first < b.first; });
|
||||
// Coalesce adjacent/contiguous ranges
|
||||
std::vector<std::pair<VAddr, size_t>> coalesced;
|
||||
coalesced.reserve(ranges.size());
|
||||
VAddr cur_addr = ranges[0].first;
|
||||
size_t cur_size = static_cast<size_t>(ranges[0].second);
|
||||
for (size_t i = 1; i < ranges.size(); ++i) {
|
||||
if (cur_addr + cur_size == ranges[i].first) {
|
||||
cur_size += static_cast<size_t>(ranges[i].second);
|
||||
} else {
|
||||
coalesced.emplace_back(cur_addr, cur_size);
|
||||
cur_addr = ranges[i].first;
|
||||
cur_size = static_cast<size_t>(ranges[i].second);
|
||||
}
|
||||
}
|
||||
coalesced.emplace_back(cur_addr, cur_size);
|
||||
// Use batch API to reduce lock acquisitions and contention.
|
||||
tracker->UpdatePagesCachedBatch(coalesced, delta);
|
||||
ranges.clear();
|
||||
}
|
||||
|
||||
template <bool add_to_tracker>
|
||||
void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
|
||||
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue