Compare commits

...

6 Commits

Author SHA1 Message Date
Stefanos Kornilios Mitsis Poiitidis
3a8d9b97ef fix texconv build 2025-02-24 12:18:42 +02:00
Stefanos Kornilios Mitsis Poiitidis
fb7ff2c9f7 Add re3EmergencyRemoveModel 2025-02-21 08:39:34 +02:00
Stefanos Kornilios Mitsis Poiitidis
bf14eefdc5 centralize relocation lists 2025-02-21 08:15:55 +02:00
Stefanos Kornilios Mitsis Poiitidis
3287d82e7f move InfoForTileCars, InfoForTilePeds to obj pool; they only live during init 2025-02-20 22:45:39 +02:00
Stefanos Kornilios Mitsis Poiitidis
63107a955c add tlsf; gradual relocation of obj heap 2025-02-20 22:06:43 +02:00
Stefanos Kornilios Mitsis Poiitidis
cd13bd6c19 make audio staging statics; add obj_alloc and use it on 3d models and animations 2025-02-19 20:38:59 +02:00
12 changed files with 1637 additions and 36 deletions

View File

@@ -81,7 +81,8 @@ OBJS_TEXCONV += \
../vendor/librw/src/d3d-x/d3d8render.texconv.o \
../vendor/librw/src/bmp.texconv.o \
../vendor/librw/src/png.texconv.o \
../vendor/librw/src/lodepng/lodepng.texconv.o
../vendor/librw/src/lodepng/lodepng.texconv.o \
../vendor/tlsf/tlsf.texconv.o
# Add compilation units to this list to explicity compile them with
# -O3 optimizations, while the rest get the default (-Os) treatment
@@ -277,6 +278,9 @@ aud2adpcm: aud2adpcm.c
texconv: $(OBJS_TEXCONV) | pvrtex # You'll have to rebuild pvrtex manually if you change it
$(CXX) -o $@ $(OBJS_TEXCONV)
%.texconv.o: %.c
$(CXX) -c -O3 -g -MMD -MP -o $@ -I../vendor/koshle $(INCLUDE) -I../vendor/emu -I../vendor/crypto -I../vendor/TriStripper/include $(DEFINES) -DDC_TEXCONV -DDC_SIM -D_INC_WINDOWS $(TEXCONV_FLAGS) $<
%.texconv.o: %.cpp
$(CXX) -std=c++2a -c -O3 -g -MMD -MP -o $@ -I../vendor/koshle $(INCLUDE) -I../vendor/emu -I../vendor/crypto -I../vendor/TriStripper/include $(DEFINES) -DDC_TEXCONV -DDC_SIM -D_INC_WINDOWS $(TEXCONV_FLAGS) $<

View File

@@ -269,6 +269,7 @@ RE3_OBJS = \
../src/vmu/vmu.o \
../vendor/miniLZO/minilzo.o \
\
../vendor/tlsf/tlsf.o
# Excluded \
../src/extras/custompipes.o \
@@ -380,7 +381,9 @@ INCLUDE = \
\
-I../vendor/librw \
\
-I../vendor/miniLZO
-I../vendor/miniLZO \
\
-I../vendor/tlsf
DEFINES = -DRW_DC -DLIBRW $(if $(WITH_LOGGING),-DWITH_LOGGING) $(if $(WITH_DCLOAD),-DDC_CHDIR=/pc) \
$(if $(WITH_BEEPS),-DWITH_BEEPS)

View File

@@ -52,7 +52,7 @@ CAnimBlendHierarchy::RemoveAnimSequences(void)
numSequences = 0;
}
#ifdef USE_CUSTOM_ALLOCATOR
#if 0
void
CAnimBlendHierarchy::MoveMemory(bool onlyone)
{
@@ -61,4 +61,4 @@ CAnimBlendHierarchy::MoveMemory(bool onlyone)
if(sequences[i].MoveMemory() && onlyone)
return;
}
#endif
#endif

View File

@@ -1,3 +1,9 @@
#include <cstddef>
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
void* obj_move(void* ptr);
#include "common.h"
#include "AnimBlendSequence.h"
@@ -16,7 +22,7 @@ CAnimBlendSequence::CAnimBlendSequence(void)
CAnimBlendSequence::~CAnimBlendSequence(void)
{
if(keyFrames)
RwFree(keyFrames);
obj_free(keyFrames);
}
void
@@ -42,7 +48,8 @@ CAnimBlendSequence::SetNumFrames(int numFrames, bool translation, bool compress)
sz = sizeof(KeyFrame);
type |= KF_ROT;
}
keyFrames = RwMalloc(sz * numFrames);
keyFrames = obj_alloc(sz * numFrames, &(void*&)keyFrames);
assert(keyFrames != nullptr);
this->numFrames = numFrames;
}
@@ -64,24 +71,17 @@ CAnimBlendSequence::RemoveQuaternionFlips(void)
}
}
#ifdef USE_CUSTOM_ALLOCATOR
#if 0
bool
CAnimBlendSequence::MoveMemory(void)
{
if(keyFrames){
void *newaddr = gMainHeap.MoveMemory(keyFrames);
if(newaddr != keyFrames){
if (keyFrames) {
void* newaddr = obj_move(keyFrames);
if (newaddr) {
keyFrames = newaddr;
return true;
}
}else if(keyFramesCompressed){
void *newaddr = gMainHeap.MoveMemory(keyFramesCompressed);
if(newaddr != keyFramesCompressed){
keyFramesCompressed = newaddr;
return true;
}
}
return false;
}
#endif
#endif

View File

@@ -172,6 +172,12 @@ file_t fdPedSfx;
volatile uint32 nPedSfxReqReadId = 1;
volatile uint32 nPedSfxReqNextId = 1;
// this is very wasteful and temporary
#define BANK_STAGE_SIZE 16 * 2048
static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32)));
static uint8_t stagingBufferPedSFX[PED_BLOCKSIZE_ADPCM] __attribute__((aligned(32)));
struct WavHeader {
// RIFF Header
char riff[4]; // RIFF Header Magic header
@@ -526,6 +532,7 @@ cSampleManager::SetMonoMode(uint8 nMode)
{
}
bool8
cSampleManager::LoadSampleBank(uint8 nBank)
{
@@ -555,8 +562,8 @@ cSampleManager::LoadSampleBank(uint8 nBank)
// TODO: Split per-bank sfx file
int fd = fs_open(SampleBankDataFilename, O_RDONLY);
assert(fd >= 0);
// this is very wasteful and temporary
void* stagingBuffer = memalign(32, 32 * 2048);
auto stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
// Ideally, we'd suspend the CdStream thingy here or read via that instead
@@ -564,7 +571,7 @@ cSampleManager::LoadSampleBank(uint8 nBank)
fs_seek(fd, fileStart, SEEK_SET);
while (fileSize > 0) {
size_t readSize = fileSize > 32 * 2048 ? 32 * 2048 : fileSize;
size_t readSize = fileSize > BANK_STAGE_SIZE ? BANK_STAGE_SIZE : fileSize;
int rs = fs_read(fd, stagingBuffer, readSize);
debugf("Read %d bytes, expected %d\n", rs, readSize);
assert(rs == readSize);
@@ -574,7 +581,6 @@ cSampleManager::LoadSampleBank(uint8 nBank)
debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize);
}
fs_close(fd);
free(stagingBuffer);
for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) {
@@ -723,7 +729,8 @@ cSampleManager::LoadPedComment(uint32 nComment)
// TODO: When we can dma directly to AICA, we can use this instead
// fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size);
void* stagingBuffer = memalign(32, cmd->size);
// TODO: Merge stagingBufferPedSFX with stagingBuffer
void* stagingBuffer = stagingBufferPedSFX;
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
@@ -731,7 +738,6 @@ cSampleManager::LoadPedComment(uint32 nComment)
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
free(stagingBuffer);
nPedSfxReqReadId = nPedSfxReqReadId + 1;
});

View File

@@ -34,6 +34,9 @@ CPathInfoForObject *InfoForTilePeds;
CTempDetachedNode *DetachedNodesCars;
CTempDetachedNode *DetachedNodesPeds;
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
bool
CPedPath::CalcPedRoute(int8 pathType, CVector position, CVector destination, CVector *pointPoses, int16 *pointsFound, int16 maxPoints)
{
@@ -245,15 +248,22 @@ CPathFind::Init(void)
void
CPathFind::AllocatePathFindInfoMem(int16 numPathGroups)
{
delete[] InfoForTileCars;
InfoForTileCars = nil;
delete[] InfoForTilePeds;
InfoForTilePeds = nil;
assert(numPathGroups == 4500);
if (InfoForTileCars) {
obj_free(InfoForTileCars);
InfoForTileCars = nil;
}
if (InfoForTilePeds) {
obj_free(InfoForTilePeds);
InfoForTilePeds = nil;
}
// NB: MIAMI doesn't use numPathGroups here but hardcodes 4500
InfoForTileCars = new CPathInfoForObject[12*numPathGroups];
InfoForTileCars = (CPathInfoForObject*) obj_alloc(sizeof(CPathInfoForObject)*12*numPathGroups, nil);
memset(InfoForTileCars, 0, 12*numPathGroups*sizeof(CPathInfoForObject));
InfoForTilePeds = new CPathInfoForObject[12*numPathGroups];
InfoForTilePeds = (CPathInfoForObject*) obj_alloc(sizeof(CPathInfoForObject)*12*numPathGroups, nil);
memset(InfoForTilePeds, 0, 12*numPathGroups*sizeof(CPathInfoForObject));
// unused
@@ -438,10 +448,15 @@ CPathFind::PreparePathData(void)
CountFloodFillGroups(PATH_CAR);
CountFloodFillGroups(PATH_PED);
delete[] InfoForTileCars;
InfoForTileCars = nil;
delete[] InfoForTilePeds;
InfoForTilePeds = nil;
if (InfoForTileCars) {
obj_free(InfoForTileCars);
InfoForTileCars = nil;
}
if (InfoForTilePeds) {
obj_free(InfoForTilePeds);
InfoForTilePeds = nil;
}
delete[] DetachedNodesCars;
DetachedNodesCars = nil;

View File

@@ -1012,8 +1012,12 @@ void CGame::InitialiseWhenRestarting(void)
DMAudio.ChangeMusicMode(MUSICMODE_GAME);
}
bool obj_relocate();
void CGame::Process(void)
{
obj_relocate();
CPad::UpdatePads();
#ifdef USE_CUSTOM_ALLOCATOR
ProcessTidyUpMemory();

View File

@@ -1164,6 +1164,12 @@ bool re3RemoveLeastUsedModel() {
return CStreaming::RemoveLeastUsedModel();
}
bool re3EmergencyRemoveModel() {
auto initial = CStreaming::ms_memoryUsed;
CStreaming::DeleteRwObjectsBehindCamera(CStreaming::ms_memoryUsed);
return CStreaming::ms_memoryUsed < initial;
}
bool
CStreaming::RemoveLeastUsedModel(void)
{

View File

@@ -16,6 +16,8 @@ extern const char* currentFile;
#define texconvf(...) // printf(__VA_ARGS__)
#endif
#include "tlsf.h"
#include "../../../src/vmu/vmu.h"
#include "../rwbase.h"
#include "../rwerror.h"
@@ -39,6 +41,86 @@ extern const char* currentFile;
#define logf(...) // printf(__VA_ARGS__)
bool re3RemoveLeastUsedModel();
bool re3EmergencyRemoveModel();
std::map<void*, void**> relocatableAllocs;
uint8_t obj_heap[4 * 1024 * 1024];
tlsf_t obj_pool;
void obj_init() {
obj_pool = tlsf_create_with_pool(obj_heap, sizeof(obj_heap));
}
void* last_relocation;
bool obj_relocate();
void* obj_alloc(size_t size, void** storage) {
auto rv = tlsf_malloc(obj_pool, size);
while (rv == nullptr) {
if (!re3RemoveLeastUsedModel() && !re3EmergencyRemoveModel()) {
fprintf(stderr, "obj_alloc: out of memory, doing full compaction\n");
last_relocation = 0;
while (obj_relocate())
;
// last chance
}
fprintf(stderr, "obj_alloc: soft out of memory\n");
rv = tlsf_malloc(obj_pool, size);
}
relocatableAllocs[rv] = storage;
return rv;
}
void obj_free(void* p) {
relocatableAllocs.erase(p);
tlsf_free(obj_pool, p);
}
void* obj_move(void* p) {
return tlsf_move(obj_pool, p);
}
bool obj_relocate() {
// FILE* f = fopen("/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native.tmp", "w");
// fprintf(f, "ALLOC: %p, %d\n", (uintptr_t)obj_heap & 0xFFFFFF, sizeof(obj_heap));
// for(auto allocation: relocatableAllocs) {
// fprintf(f, "ALLOC: %p, %d\n", (uintptr_t&)allocation.first & 0xFFFFFF, tlsf_block_size(allocation.first));
// }
// fclose(f);
// fs_unlink("/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native");
// fs_rename("/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native.tmp", "/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native");
// fprintf(stderr, "obj_relocate: %p\n", last_relocation);
int toRelocate = 10 * 1024;
auto start = relocatableAllocs.upper_bound(last_relocation);
if (start == relocatableAllocs.end())
start = relocatableAllocs.begin();
while(start != relocatableAllocs.end()) {
auto old = start->first;
auto storage = start->second;
auto oldSize = tlsf_block_size(old);
auto newp = obj_move(start->first);
if (newp) {
toRelocate -= oldSize;
*storage = newp;
start = relocatableAllocs.erase(start, std::next(start));
relocatableAllocs[newp] = storage;
last_relocation = newp;
// fprintf(stderr, "obj_relocate: %p -> %p, %d\n", old, newp, oldSize);
if (toRelocate <= 0)
return true;
} else {
start++;
}
}
last_relocation = 0;
return false;
}
// #include "rwdcimpl.h"
@@ -4382,6 +4464,7 @@ ObjPipeline* makeDefaultPipeline(void)
static void*
driverOpen(void *o, int32, int32)
{
obj_init();
pvr_init(&pvr_params);
fake_tex = pvr_mem_malloc(sizeof(fake_tex_data));
@@ -4603,7 +4686,7 @@ void*
destroyNativeData(void *object, int32, int32)
{
auto geo = (Geometry*)object;
rwFree(geo->instData);
obj_free(geo->instData);
geo->instData = nil;
return object;
@@ -4620,7 +4703,9 @@ readNativeData(Stream *stream, int32 length, void *object, int32, int32)
return nil;
}
DCModelDataHeader *header = (DCModelDataHeader *)rwNew(sizeof(DCModelDataHeader) + chunkLen - 8, MEMDUR_EVENT | ID_GEOMETRY);
DCModelDataHeader *header = (DCModelDataHeader *)obj_alloc(sizeof(DCModelDataHeader) + chunkLen - 8, &(void*&)geo->instData);
assert(header != nullptr);
geo->instData = header;
stream->read32(&header->platform, 4);
uint32_t version;

92
vendor/tlsf/README.md vendored Normal file
View File

@@ -0,0 +1,92 @@
# tlsf
Two-Level Segregated Fit memory allocator implementation.
Written by Matthew Conte (matt@baisoku.org).
Released under the BSD license.
Features
--------
* O(1) cost for malloc, free, realloc, memalign
* Extremely low overhead per allocation (4 bytes)
* Low overhead per TLSF management of pools (~3kB)
* Low fragmentation
* Compiles to only a few kB of code and data
* Support for adding and removing memory pool regions on the fly
Caveats
-------
* Currently, assumes architecture can make 4-byte aligned accesses
* Not designed to be thread safe; the user must provide this
Notes
-----
This code was based on the TLSF 1.4 spec and documentation found at:
http://www.gii.upv.es/tlsf/main/docs
It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes.
History
-------
2016/04/10 - v3.1
* Code moved to github
* tlsfbits.h rolled into tlsf.c
* License changed to BSD
2014/02/08 - v3.0
* This version is based on improvements from 3DInteractive GmbH
* Interface changed to allow more than one memory pool
* Separated pool handling from control structure (adding, removing, debugging)
* Control structure and pools can still be constructed in the same memory block
* Memory blocks for control structure and pools are checked for alignment
* Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation
* Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead()
* Pool must be empty when it is removed, in order to allow O(1) removal
2011/10/20 - v2.0
* 64-bit support
* More compiler intrinsics for ffs/fls
* ffs/fls verification during TLSF creation in debug builds
2008/04/04 - v1.9
* Add tlsf_heap_check, a heap integrity check
* Support a predefined tlsf_assert macro
* Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path
2007/02/08 - v1.8
* Fix for unnecessary reallocation in tlsf_realloc
2007/02/03 - v1.7
* tlsf_heap_walk takes a callback
* tlsf_realloc now returns NULL on failure
* tlsf_memalign optimization for 4-byte alignment
* Usage of size_t where appropriate
2006/11/21 - v1.6
* ffs/fls broken out into tlsfbits.h
* tlsf_overhead queries per-pool overhead
2006/11/07 - v1.5
* Smart realloc implementation
* Smart memalign implementation
2006/10/11 - v1.4
* Add some ffs/fls implementations
* Minor code footprint reduction
2006/09/14 - v1.3
* Profiling indicates heavy use of blocks of size 1-128, so implement small block handling
* Reduce pool overhead by about 1kb
* Reduce minimum block size from 32 to 12 bytes
* Realloc bug fix
2006/09/09 - v1.2
* Add tlsf_block_size
* Static assertion mechanism for invariants
* Minor bugfixes
2006/09/01 - v1.1
* Add tlsf_realloc
* Add tlsf_walk_heap
2006/08/25 - v1.0
* First release

1295
vendor/tlsf/tlsf.c vendored Normal file

File diff suppressed because it is too large Load Diff

91
vendor/tlsf/tlsf.h vendored Normal file
View File

@@ -0,0 +1,91 @@
#ifndef INCLUDED_tlsf
#define INCLUDED_tlsf
/*
** Two Level Segregated Fit memory allocator, version 3.1.
** Written by Matthew Conte
** http://tlsf.baisoku.org
**
** Based on the original documentation by Miguel Masmano:
** http://www.gii.upv.es/tlsf/main/docs
**
** This implementation was written to the specification
** of the document, therefore no GPL restrictions apply.
**
** Copyright (c) 2006-2016, Matthew Conte
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions are met:
** * Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** * Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** * Neither the name of the copyright holder nor the
** names of its contributors may be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY
** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stddef.h>
#if defined(__cplusplus)
extern "C" {
#endif
/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */
/* pool_t: a block of memory that TLSF can manage. */
typedef void* tlsf_t;
typedef void* pool_t;
/* Create/destroy a memory pool. */
tlsf_t tlsf_create(void* mem);
tlsf_t tlsf_create_with_pool(void* mem, size_t bytes);
void tlsf_destroy(tlsf_t tlsf);
pool_t tlsf_get_pool(tlsf_t tlsf);
/* Add/remove memory pools. */
pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes);
void tlsf_remove_pool(tlsf_t tlsf, pool_t pool);
/* malloc/memalign/realloc/free replacements. */
void* tlsf_malloc(tlsf_t tlsf, size_t bytes);
void* tlsf_move(tlsf_t tlsf, void* ptr);
void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes);
void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size);
void tlsf_free(tlsf_t tlsf, void* ptr);
/* Returns internal block size, not original request size */
size_t tlsf_block_size(void* ptr);
/* Overheads/limits of internal structures. */
size_t tlsf_size(void);
size_t tlsf_align_size(void);
size_t tlsf_block_size_min(void);
size_t tlsf_block_size_max(void);
size_t tlsf_pool_overhead(void);
size_t tlsf_alloc_overhead(void);
/* Debugging. */
typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user);
void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user);
/* Returns nonzero if any internal consistency check fails. */
int tlsf_check(tlsf_t tlsf);
int tlsf_check_pool(pool_t pool);
#if defined(__cplusplus)
};
#endif
#endif