VideoCommon: Don't create mipmap vector in TextureInfo

The TextureInfo constructor creates a vector of MipLevels. This could be
good for performance if MipLevels are accessed very often for each
TextureInfo, but that's not the case. Dolphin creates thousands of
TextureInfos per second that it never accesses the mipmap levels of
because there's a hit in the texture cache, and in the uncommon case of
a texture cache miss, the mipmap levels only get looped through once.

To make the common case of texture cache hits as fast as possible, let's
not create a vector in the TextureInfo constructor. This commit
implements a custom iterator for MipLevels instead.

In my testing on the Death Star level of Rogue Squadron 2, this speeds
up TextureInfo::FromStage by 200%, giving an overall emulation speedup
of a bit over 1%. Results on the Hoth level are even better, with
TextureInfo::FromStage being close to 300% faster and overall emulation
being over 4% faster. (Single core, no GPU texture decoding.)
This commit is contained in:
JosJuice
2026-01-17 17:43:06 +01:00
parent 2aee998a8e
commit f7b4d2738b
3 changed files with 152 additions and 79 deletions

View File

@@ -1721,32 +1721,36 @@ RcTcacheEntry TextureCacheBase::CreateTextureEntry(
dst_buffer += decoded_texture_size;
}
for (u32 level = 1; level != texLevels; ++level)
for (const auto& mip_level : texture_info.GetMipMapLevels())
{
auto mip_level = texture_info.GetMipMapLevel(level - 1);
if (!mip_level)
if (!mip_level.IsDataValid())
{
ERROR_LOG_FMT(VIDEO, "Trying to use an invalid mipmap address {:#010x}",
texture_info.GetRawAddress());
continue;
}
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry, level, mip_level->GetData(), mip_level->GetTextureSize(),
texture_info.GetTextureFormat(), mip_level->GetRawWidth(),
mip_level->GetRawHeight(), mip_level->GetExpandedWidth(),
mip_level->GetExpandedHeight(),
!DecodeTextureOnGPU(entry, mip_level.GetLevel(), mip_level.GetData(),
mip_level.GetTextureSize(), texture_info.GetTextureFormat(),
mip_level.GetRawWidth(), mip_level.GetRawHeight(),
mip_level.GetExpandedWidth(), mip_level.GetExpandedHeight(),
creation_info.bytes_per_block *
(mip_level->GetExpandedWidth() / texture_info.GetBlockWidth()),
(mip_level.GetExpandedWidth() / texture_info.GetBlockWidth()),
texture_info.GetTlutAddress(), texture_info.GetTlutFormat()))
{
// No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning
const u32 decoded_mip_size =
mip_level->GetExpandedWidth() * sizeof(u32) * mip_level->GetExpandedHeight();
TexDecoder_Decode(dst_buffer, mip_level->GetData(), mip_level->GetExpandedWidth(),
mip_level->GetExpandedHeight(), texture_info.GetTextureFormat(),
mip_level.GetExpandedWidth() * sizeof(u32) * mip_level.GetExpandedHeight();
TexDecoder_Decode(dst_buffer, mip_level.GetData(), mip_level.GetExpandedWidth(),
mip_level.GetExpandedHeight(), texture_info.GetTextureFormat(),
texture_info.GetTlutAddress(), texture_info.GetTlutFormat());
entry->texture->Load(level, mip_level->GetRawWidth(), mip_level->GetRawHeight(),
mip_level->GetExpandedWidth(), dst_buffer, decoded_mip_size);
entry->texture->Load(mip_level.GetLevel(), mip_level.GetRawWidth(),
mip_level.GetRawHeight(), mip_level.GetExpandedWidth(), dst_buffer,
decoded_mip_size);
arbitrary_mip_detector.AddLevel(mip_level->GetRawWidth(), mip_level->GetRawHeight(),
mip_level->GetExpandedWidth(), dst_buffer);
arbitrary_mip_detector.AddLevel(mip_level.GetRawWidth(), mip_level.GetRawHeight(),
mip_level.GetExpandedWidth(), dst_buffer);
dst_buffer += decoded_mip_size;
}