mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-06-06 04:47:12 +02:00
[crypto] Rework AES CTR/XTS streaming and squash heap churn (#2782)
AES Updates: Replaced heap churn with stack scratch buffers tail handling now stays in-place, no more recursive transcode detours. CTR/XTS modes read in larger, aligned chunks and still handle odd offsets cleanly. XTS prefetches a few sectors ahead to reduce extra reads. AesCtrStorage writer now uses the pooled buffer properly one stack slab, chunk forward, bump counter, repeat. Result: less malloc noise, fewer watchdog spikes at startup (though mbedtls still sets the pace). This should make the loading speed slightly better than before. Make sure to test. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2782 Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: crueter <crueter@eden-emu.dev> Co-authored-by: godpow <thesaviorsrule@yahoo.com> Co-committed-by: godpow <thesaviorsrule@yahoo.com>
This commit is contained in:
parent
73ebf59af7
commit
41e15e95b1
5 changed files with 167 additions and 115 deletions
|
|
@ -4,8 +4,9 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <vector>
|
#include <cstring>
|
||||||
#include <mbedtls/cipher.h>
|
#include <mbedtls/cipher.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
|
@ -15,6 +16,7 @@
|
||||||
namespace Core::Crypto {
|
namespace Core::Crypto {
|
||||||
namespace {
|
namespace {
|
||||||
using NintendoTweak = std::array<u8, 16>;
|
using NintendoTweak = std::array<u8, 16>;
|
||||||
|
constexpr std::size_t AesBlockBytes = 16;
|
||||||
|
|
||||||
NintendoTweak CalculateNintendoTweak(std::size_t sector_id) {
|
NintendoTweak CalculateNintendoTweak(std::size_t sector_id) {
|
||||||
NintendoTweak out{};
|
NintendoTweak out{};
|
||||||
|
|
@ -75,39 +77,51 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, std::size_t size, u8* des
|
||||||
|
|
||||||
mbedtls_cipher_reset(context);
|
mbedtls_cipher_reset(context);
|
||||||
|
|
||||||
// Only ECB strictly requires block sized chunks.
|
if (size == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const auto mode = mbedtls_cipher_get_cipher_mode(context);
|
||||||
std::size_t written = 0;
|
std::size_t written = 0;
|
||||||
if (mbedtls_cipher_get_cipher_mode(context) != MBEDTLS_MODE_ECB) {
|
|
||||||
mbedtls_cipher_update(context, src, size, dest, &written);
|
if (mode != MBEDTLS_MODE_ECB) {
|
||||||
if (written != size)
|
const int ret = mbedtls_cipher_update(context, src, size, dest, &written);
|
||||||
|
ASSERT(ret == 0);
|
||||||
|
if (written != size) {
|
||||||
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", size, written);
|
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", size, written);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ECB path: operate in block sized chunks and mirror previous behavior.
|
|
||||||
const auto block_size = mbedtls_cipher_get_block_size(context);
|
const auto block_size = mbedtls_cipher_get_block_size(context);
|
||||||
if (size < block_size) {
|
ASSERT(block_size <= AesBlockBytes);
|
||||||
std::vector<u8> block(block_size);
|
|
||||||
std::memcpy(block.data(), src, size);
|
|
||||||
Transcode(block.data(), block.size(), block.data(), op);
|
|
||||||
std::memcpy(dest, block.data(), size);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (std::size_t offset = 0; offset < size; offset += block_size) {
|
const std::size_t whole_block_bytes = size - (size % block_size);
|
||||||
const auto length = std::min<std::size_t>(block_size, size - offset);
|
if (whole_block_bytes != 0) {
|
||||||
mbedtls_cipher_update(context, src + offset, length, dest + offset, &written);
|
const int ret = mbedtls_cipher_update(context, src, whole_block_bytes, dest, &written);
|
||||||
if (written != length) {
|
ASSERT(ret == 0);
|
||||||
if (length < block_size) {
|
if (written != whole_block_bytes) {
|
||||||
std::vector<u8> block(block_size);
|
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.",
|
||||||
std::memcpy(block.data(), src + offset, length);
|
whole_block_bytes, written);
|
||||||
Transcode(block.data(), block.size(), block.data(), op);
|
|
||||||
std::memcpy(dest + offset, block.data(), length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", length, written);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const std::size_t tail = size - whole_block_bytes;
|
||||||
|
if (tail == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
std::array<u8, AesBlockBytes> tail_buffer{};
|
||||||
|
std::memcpy(tail_buffer.data(), src + whole_block_bytes, tail);
|
||||||
|
|
||||||
|
std::size_t tail_written = 0;
|
||||||
|
const int ret = mbedtls_cipher_update(context, tail_buffer.data(), block_size, tail_buffer.data(),
|
||||||
|
&tail_written);
|
||||||
|
ASSERT(ret == 0);
|
||||||
|
if (tail_written != block_size) {
|
||||||
|
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", block_size,
|
||||||
|
tail_written);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(dest + whole_block_bytes, tail_buffer.data(), tail);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Key, std::size_t KeySize>
|
template <typename Key, std::size_t KeySize>
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "core/crypto/ctr_encryption_layer.h"
|
#include "core/crypto/ctr_encryption_layer.h"
|
||||||
|
|
||||||
|
|
@ -18,35 +19,84 @@ std::size_t CTREncryptionLayer::Read(u8* data, std::size_t length, std::size_t o
|
||||||
if (length == 0)
|
if (length == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
constexpr std::size_t BlockSize = 0x10;
|
||||||
|
constexpr std::size_t MaxChunkSize = 0x10000;
|
||||||
|
|
||||||
std::size_t total_read = 0;
|
std::size_t total_read = 0;
|
||||||
// Handle an initial misaligned portion if needed.
|
auto* out = data;
|
||||||
if (auto const sector_offset = offset & 0xF; sector_offset != 0) {
|
std::size_t remaining = length;
|
||||||
const std::size_t aligned_off = offset - sector_offset;
|
std::size_t current_offset = offset;
|
||||||
std::array<u8, 0x10> block{};
|
|
||||||
if (auto const got = base->Read(block.data(), block.size(), aligned_off); got != 0) {
|
const auto read_exact = [this](u8* dst, std::size_t bytes, std::size_t src_offset) {
|
||||||
UpdateIV(base_offset + aligned_off);
|
std::size_t filled = 0;
|
||||||
cipher.Transcode(block.data(), got, block.data(), Op::Decrypt);
|
while (filled < bytes) {
|
||||||
auto const to_copy = std::min<std::size_t>(length, got > sector_offset ? got - sector_offset : 0);
|
const std::size_t got = base->Read(dst + filled, bytes - filled, src_offset + filled);
|
||||||
if (to_copy > 0) {
|
if (got == 0)
|
||||||
std::memcpy(data, block.data() + sector_offset, to_copy);
|
break;
|
||||||
data += to_copy;
|
filled += got;
|
||||||
offset += to_copy;
|
|
||||||
length -= to_copy;
|
|
||||||
total_read += to_copy;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
return filled;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (const std::size_t intra_block = current_offset & (BlockSize - 1); intra_block != 0) {
|
||||||
|
std::array<u8, BlockSize> block{};
|
||||||
|
const std::size_t aligned_offset = current_offset - intra_block;
|
||||||
|
const std::size_t got = read_exact(block.data(), BlockSize, aligned_offset);
|
||||||
|
if (got <= intra_block)
|
||||||
|
return total_read;
|
||||||
|
|
||||||
|
UpdateIV(base_offset + aligned_offset);
|
||||||
|
cipher.Transcode(block.data(), got, block.data(), Op::Decrypt);
|
||||||
|
|
||||||
|
const std::size_t available = got - intra_block;
|
||||||
|
const std::size_t to_copy = std::min<std::size_t>(remaining, available);
|
||||||
|
std::memcpy(out, block.data() + intra_block, to_copy);
|
||||||
|
|
||||||
|
out += to_copy;
|
||||||
|
current_offset += to_copy;
|
||||||
|
remaining -= to_copy;
|
||||||
|
total_read += to_copy;
|
||||||
|
|
||||||
|
if (to_copy != available)
|
||||||
|
return total_read;
|
||||||
}
|
}
|
||||||
if (length > 0) {
|
|
||||||
// Now aligned to 0x10
|
while (remaining >= BlockSize) {
|
||||||
UpdateIV(base_offset + offset);
|
const std::size_t chunk_request = std::min<std::size_t>(remaining, MaxChunkSize);
|
||||||
const std::size_t got = base->Read(data, length, offset);
|
const std::size_t aligned_request = chunk_request - (chunk_request % BlockSize);
|
||||||
if (got > 0) {
|
if (aligned_request == 0)
|
||||||
cipher.Transcode(data, got, data, Op::Decrypt);
|
break;
|
||||||
total_read += got;
|
|
||||||
}
|
const std::size_t got = read_exact(out, aligned_request, current_offset);
|
||||||
|
if (got == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
UpdateIV(base_offset + current_offset);
|
||||||
|
cipher.Transcode(out, got, out, Op::Decrypt);
|
||||||
|
|
||||||
|
out += got;
|
||||||
|
current_offset += got;
|
||||||
|
remaining -= got;
|
||||||
|
total_read += got;
|
||||||
|
|
||||||
|
if (got < aligned_request)
|
||||||
|
return total_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (remaining > 0) {
|
||||||
|
std::array<u8, BlockSize> block{};
|
||||||
|
const std::size_t got = read_exact(block.data(), BlockSize, current_offset);
|
||||||
|
if (got == 0)
|
||||||
|
return total_read;
|
||||||
|
|
||||||
|
UpdateIV(base_offset + current_offset);
|
||||||
|
cipher.Transcode(block.data(), got, block.data(), Op::Decrypt);
|
||||||
|
|
||||||
|
const std::size_t to_copy = std::min<std::size_t>(remaining, got);
|
||||||
|
std::memcpy(out, block.data(), to_copy);
|
||||||
|
total_read += to_copy;
|
||||||
|
}
|
||||||
|
|
||||||
return total_read;
|
return total_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,67 +20,49 @@ std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t o
|
||||||
if (length == 0)
|
if (length == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
constexpr std::size_t PrefetchSectors = 4;
|
||||||
|
|
||||||
|
auto* out = data;
|
||||||
|
std::size_t remaining = length;
|
||||||
|
std::size_t current_offset = offset;
|
||||||
std::size_t total_read = 0;
|
std::size_t total_read = 0;
|
||||||
// Handle initial unaligned part within a sector.
|
|
||||||
if (auto const sector_offset = offset % XTS_SECTOR_SIZE; sector_offset != 0) {
|
|
||||||
const std::size_t aligned_off = offset - sector_offset;
|
|
||||||
std::array<u8, XTS_SECTOR_SIZE> block{};
|
|
||||||
if (auto const got = base->Read(block.data(), XTS_SECTOR_SIZE, aligned_off); got > 0) {
|
|
||||||
if (got < XTS_SECTOR_SIZE)
|
|
||||||
std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got);
|
|
||||||
cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(), aligned_off / XTS_SECTOR_SIZE,
|
|
||||||
XTS_SECTOR_SIZE, Op::Decrypt);
|
|
||||||
|
|
||||||
auto const to_copy = std::min<std::size_t>(length, got > sector_offset ? got - sector_offset : 0);
|
std::array<u8, XTS_SECTOR_SIZE> sector{};
|
||||||
if (to_copy > 0) {
|
|
||||||
std::memcpy(data, block.data() + sector_offset, to_copy);
|
|
||||||
data += to_copy;
|
|
||||||
offset += to_copy;
|
|
||||||
length -= to_copy;
|
|
||||||
total_read += to_copy;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length > 0) {
|
while (remaining > 0) {
|
||||||
// Process aligned middle inplace, in sector sized multiples.
|
const std::size_t sector_index = current_offset / XTS_SECTOR_SIZE;
|
||||||
while (length >= XTS_SECTOR_SIZE) {
|
const std::size_t sector_offset = current_offset % XTS_SECTOR_SIZE;
|
||||||
const std::size_t req = (length / XTS_SECTOR_SIZE) * XTS_SECTOR_SIZE;
|
|
||||||
const std::size_t got = base->Read(data, req, offset);
|
const std::size_t sectors_to_read = std::min<std::size_t>(PrefetchSectors,
|
||||||
if (got == 0) {
|
(remaining + sector_offset +
|
||||||
|
XTS_SECTOR_SIZE - 1) /
|
||||||
|
XTS_SECTOR_SIZE);
|
||||||
|
|
||||||
|
for (std::size_t s = 0; s < sectors_to_read && remaining > 0; ++s) {
|
||||||
|
const std::size_t index = sector_index + s;
|
||||||
|
const std::size_t read_offset = index * XTS_SECTOR_SIZE;
|
||||||
|
const std::size_t got = base->Read(sector.data(), XTS_SECTOR_SIZE, read_offset);
|
||||||
|
if (got == 0)
|
||||||
return total_read;
|
return total_read;
|
||||||
}
|
|
||||||
const std::size_t got_rounded = got - (got % XTS_SECTOR_SIZE);
|
if (got < XTS_SECTOR_SIZE)
|
||||||
if (got_rounded > 0) {
|
std::memset(sector.data() + got, 0, XTS_SECTOR_SIZE - got);
|
||||||
cipher.XTSTranscode(data, got_rounded, data, offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
|
|
||||||
data += got_rounded;
|
cipher.XTSTranscode(sector.data(), XTS_SECTOR_SIZE, sector.data(), index, XTS_SECTOR_SIZE,
|
||||||
offset += got_rounded;
|
Op::Decrypt);
|
||||||
length -= got_rounded;
|
|
||||||
total_read += got_rounded;
|
const std::size_t local_offset = (s == 0) ? sector_offset : 0;
|
||||||
}
|
const std::size_t available = XTS_SECTOR_SIZE - local_offset;
|
||||||
// If we didn't get a full sector next, break to handle tail.
|
const std::size_t to_copy = std::min<std::size_t>(available, remaining);
|
||||||
if (got_rounded != got) {
|
std::memcpy(out, sector.data() + local_offset, to_copy);
|
||||||
break;
|
|
||||||
}
|
out += to_copy;
|
||||||
}
|
current_offset += to_copy;
|
||||||
// Handle tail within a sector, if any.
|
remaining -= to_copy;
|
||||||
if (length > 0) {
|
total_read += to_copy;
|
||||||
std::array<u8, XTS_SECTOR_SIZE> block{};
|
|
||||||
const std::size_t got = base->Read(block.data(), XTS_SECTOR_SIZE, offset);
|
|
||||||
if (got > 0) {
|
|
||||||
if (got < XTS_SECTOR_SIZE) {
|
|
||||||
std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got);
|
|
||||||
}
|
|
||||||
cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(),
|
|
||||||
offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
|
|
||||||
const std::size_t to_copy = std::min<std::size_t>(length, got);
|
|
||||||
std::memcpy(data, block.data(), to_copy);
|
|
||||||
total_read += to_copy;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return total_read;
|
return total_read;
|
||||||
}
|
}
|
||||||
} // namespace Core::Crypto
|
} // namespace Core::Crypto
|
||||||
|
|
|
||||||
|
|
@ -86,18 +86,21 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) {
|
||||||
|
|
||||||
// Loop until all data is written using a pooled buffer residing on the stack (blocksize = 0x10)
|
// Loop until all data is written using a pooled buffer residing on the stack (blocksize = 0x10)
|
||||||
boost::container::static_vector<u8, BlockSize> pooled_buffer;
|
boost::container::static_vector<u8, BlockSize> pooled_buffer;
|
||||||
for (size_t remaining = size; remaining > 0; ) {
|
pooled_buffer.resize(BlockSize);
|
||||||
// Determine data we're writing and where.
|
|
||||||
auto const write_size = (std::min)(pooled_buffer.size(), remaining);
|
const u8* cur = buffer;
|
||||||
u8* write_buf = pooled_buffer.data();
|
size_t remaining = size;
|
||||||
|
size_t current_offset = offset;
|
||||||
|
|
||||||
|
while (remaining > 0) {
|
||||||
|
const size_t write_size = std::min<std::size_t>(pooled_buffer.size(), remaining);
|
||||||
|
|
||||||
// Encrypt the data and then write it.
|
|
||||||
m_cipher->SetIV(ctr);
|
m_cipher->SetIV(ctr);
|
||||||
m_cipher->Transcode(buffer, write_size, write_buf, Core::Crypto::Op::Encrypt);
|
m_cipher->Transcode(cur, write_size, pooled_buffer.data(), Core::Crypto::Op::Encrypt);
|
||||||
m_base_storage->Write(write_buf, write_size, offset);
|
m_base_storage->Write(pooled_buffer.data(), write_size, current_offset);
|
||||||
|
|
||||||
// Advance next write chunk
|
cur += write_size;
|
||||||
offset += write_size;
|
current_offset += write_size;
|
||||||
remaining -= write_size;
|
remaining -= write_size;
|
||||||
if (remaining > 0)
|
if (remaining > 0)
|
||||||
AddCounter(ctr.data(), IvSize, write_size / BlockSize);
|
AddCounter(ctr.data(), IvSize, write_size / BlockSize);
|
||||||
|
|
|
||||||
|
|
@ -65,10 +65,13 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const {
|
||||||
if ((offset % m_block_size) != 0) {
|
if ((offset % m_block_size) != 0) {
|
||||||
// Decrypt into our pooled stack buffer (max bound = NCA::XtsBlockSize)
|
// Decrypt into our pooled stack buffer (max bound = NCA::XtsBlockSize)
|
||||||
boost::container::static_vector<u8, NcaHeader::XtsBlockSize> tmp_buf;
|
boost::container::static_vector<u8, NcaHeader::XtsBlockSize> tmp_buf;
|
||||||
|
ASSERT(m_block_size <= tmp_buf.max_size());
|
||||||
|
tmp_buf.resize(m_block_size);
|
||||||
// Determine the size of the pre-data read.
|
// Determine the size of the pre-data read.
|
||||||
auto const skip_size = size_t(offset - Common::AlignDown(offset, m_block_size));
|
auto const skip_size = size_t(offset - Common::AlignDown(offset, m_block_size));
|
||||||
auto const data_size = (std::min)(size, m_block_size - skip_size);
|
auto const data_size = (std::min)(size, m_block_size - skip_size);
|
||||||
std::fill_n(tmp_buf.begin(), skip_size, u8{0});
|
if (skip_size > 0)
|
||||||
|
std::fill_n(tmp_buf.begin(), skip_size, u8{0});
|
||||||
std::memcpy(tmp_buf.data() + skip_size, buffer, data_size);
|
std::memcpy(tmp_buf.data() + skip_size, buffer, data_size);
|
||||||
m_cipher->SetIV(ctr);
|
m_cipher->SetIV(ctr);
|
||||||
m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(), Core::Crypto::Op::Decrypt);
|
m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(), Core::Crypto::Op::Decrypt);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue