Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fa84fefa06 | ||
|
|
0b7892665e | ||
|
|
fa563ee805 | ||
|
|
81fa320f1e | ||
|
|
449be62e98 | ||
|
|
e4814f3e51 | ||
|
|
edeee9fd4f | ||
|
|
15fdaecfc7 | ||
|
|
5316a3572e | ||
|
|
1f2f270da9 | ||
|
|
1f4dace511 | ||
|
|
fc4a7e3791 | ||
|
|
04b11dfb9d | ||
|
|
2716147db4 | ||
|
|
dc96ffc551 | ||
|
|
4d13e821b5 | ||
|
|
cca7b3c6fa | ||
|
|
dbcc46b774 | ||
|
|
92be9cfbdd | ||
|
|
7bb5e1640e |
@@ -381,6 +381,7 @@ INCLUDE = \
|
||||
-I../src/liberty/skel/win \
|
||||
\
|
||||
-I../vendor/librw \
|
||||
-I../vendor/librw/src/dc \
|
||||
\
|
||||
-I../vendor/miniLZO \
|
||||
\
|
||||
|
||||
@@ -396,6 +396,7 @@ INCLUDE = \
|
||||
-I../src/miami/skel/win \
|
||||
\
|
||||
-I../vendor/librw \
|
||||
-I../vendor/librw/src/dc \
|
||||
\
|
||||
-I../vendor/miniLZO \
|
||||
\
|
||||
|
||||
@@ -94,23 +94,26 @@ void VmuProfiler::run() {
|
||||
pvr_stats_t pvrStats; pvr_get_stats(&pvrStats);
|
||||
uint32_t sramStats = snd_mem_available();
|
||||
size_t pvrAvail = pvr_mem_available();
|
||||
float fps = std::accumulate(std::begin(fps_), std::end(fps_), 0.0f)
|
||||
/ static_cast<float>(fpsSamples);
|
||||
|
||||
float sh4Mem = heapUtilization();
|
||||
float pvrMem = (8_MB - pvrAvail ) / 8_MB * 100.0f;
|
||||
float armMem = (2_MB - sramStats) / 2_MB * 100.0f;
|
||||
float vtxBuf = vertBuffUse_;
|
||||
{
|
||||
std::shared_lock lk(mtx_);
|
||||
|
||||
vmu_printf("FPS :%6.2f\n"
|
||||
"SH4 :%6.2f%%\n"
|
||||
"PVR :%6.2f%%\n"
|
||||
"ARM :%6.2f%%\n"
|
||||
"VTX :%6.2f%%",
|
||||
fps, sh4Mem, pvrMem, armMem, vtxBuf);
|
||||
float vtxBuf;
|
||||
float fps;
|
||||
{ /* Critical section with main thread. */
|
||||
std::shared_lock lk(mtx_);
|
||||
vtxBuf = vertBuffUse_;
|
||||
fps = std::accumulate(std::begin(fps_), std::end(fps_), 0.0f)
|
||||
/ static_cast<float>(fpsSamples);
|
||||
}
|
||||
|
||||
vmu_printf(" FPS:%6.2f\n"
|
||||
" RAM:%6.2f%%\n"
|
||||
"VRAM:%6.2f%%\n"
|
||||
"SRAM:%6.2f%%\n"
|
||||
" VTX:%6.2f%%",
|
||||
fps, sh4Mem, pvrMem, armMem, vtxBuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -119,15 +122,21 @@ void VmuProfiler::run() {
|
||||
}
|
||||
|
||||
void VmuProfiler::updateVertexBufferUsage() {
|
||||
#ifndef DC_SH4
|
||||
std::unique_lock lk(mtx_);
|
||||
updated_ = true;
|
||||
|
||||
#ifdef DC_SH4
|
||||
vertBuffUse_ = vertexBufferUtilization();
|
||||
#else
|
||||
pvr_stats_t pvrStats;
|
||||
pvr_get_stats(&pvrStats);
|
||||
float vtxUtil = vertexBufferUtilization();
|
||||
|
||||
pvr_stats_t pvrStats;
|
||||
pvr_get_stats(&pvrStats);
|
||||
fps_[fpsFrame_++] = pvrStats.frame_rate;
|
||||
{ /* Critical section with VMU thread. */
|
||||
std::unique_lock lk(mtx_);
|
||||
vertBuffUse_ = vtxUtil;
|
||||
updated_ = true;
|
||||
fps_[fpsFrame_++] = pvrStats.frame_rate;
|
||||
}
|
||||
|
||||
if(fpsFrame_ >= fpsSamples)
|
||||
fpsFrame_ = 0;
|
||||
|
||||
@@ -1619,7 +1619,7 @@ CCollision::ProcessLineOfSight(const CColLine &line,
|
||||
point.point = matrix * point.point;
|
||||
point.normal = Multiply3x3(matrix, point.normal);
|
||||
#else
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
|
||||
dc::mat_load2(matrix);
|
||||
mat_trans_single3_nodiv(point.point.x,
|
||||
point.point.y,
|
||||
point.point.z);
|
||||
@@ -1798,7 +1798,7 @@ CCollision::ProcessVerticalLine(const CColLine &line,
|
||||
point.point = matrix * point.point;
|
||||
point.normal = Multiply3x3(matrix, point.normal);
|
||||
#else
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
|
||||
dc::mat_load2(matrix);
|
||||
mat_trans_single3_nodiv(point.point.x,
|
||||
point.point.y,
|
||||
point.point.z);
|
||||
@@ -2173,8 +2173,7 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
|
||||
#ifndef DC_SH4
|
||||
matAB *= matrixA;
|
||||
#else
|
||||
mat_load(reinterpret_cast<const matrix_t*>(&matAB));
|
||||
mat_apply(reinterpret_cast<const matrix_t*>(&matrixA));
|
||||
dc::mat_load_apply(matAB, matrixA);
|
||||
#endif
|
||||
|
||||
CColSphere bsphereAB; // bounding sphere of A in B space
|
||||
@@ -2246,8 +2245,7 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
|
||||
#ifndef DC_SH4
|
||||
matBA *= matrixB;
|
||||
#else
|
||||
mat_load(reinterpret_cast<const matrix_t*>(&matBA));
|
||||
mat_apply(reinterpret_cast<const matrix_t*>(&matrixB));
|
||||
dc::mat_load_apply(matBA, matrixB);
|
||||
#endif
|
||||
for(i = 0; i < modelB.numSpheres; i++){
|
||||
s.radius = modelB.spheres[i].radius;
|
||||
@@ -2309,7 +2307,7 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
|
||||
}
|
||||
|
||||
#ifdef DC_SH4
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrixB)));
|
||||
dc::mat_load2(matrixB);
|
||||
#endif
|
||||
for(i = 0; i < numCollisions; i++){
|
||||
#ifndef DC_SH4
|
||||
|
||||
@@ -82,8 +82,6 @@
|
||||
|
||||
#define rwVENDORID_ROCKSTAR 0x0253F2
|
||||
|
||||
__always_inline auto Max(auto a, auto b) { return ((a > b)? a : b); }
|
||||
__always_inline auto Min(auto a, auto b) { return ((a < b)? a : b); }
|
||||
|
||||
// Use this to add const that wasn't there in the original code
|
||||
#define Const const
|
||||
@@ -299,15 +297,6 @@ extern int strcasecmp(const char *str1, const char *str2);
|
||||
|
||||
extern wchar *AllocUnicode(const char*src);
|
||||
|
||||
template<typename T>
|
||||
__always_inline T Clamp(T v, auto low, auto high) {
|
||||
return std::clamp(v, static_cast<T>(low), static_cast<T>(high));
|
||||
}
|
||||
|
||||
__always_inline auto Clamp2(auto v, auto center, auto radius) {
|
||||
return (v > center) ? Min(v, center + radius) : Max(v, center - radius);
|
||||
}
|
||||
|
||||
#define SQR(x) ((x) * (x))
|
||||
__always_inline auto sq(auto x) { return SQR(x); }
|
||||
|
||||
@@ -418,15 +407,7 @@ template<int s, int t> struct check_size {
|
||||
#endif
|
||||
#define BIT(num) (1<<(num))
|
||||
|
||||
#define ABS(a) std::abs(a)
|
||||
#define ABS(a) Abs(a)
|
||||
|
||||
__always_inline auto norm(auto value, auto min, auto max) {
|
||||
return (Clamp(value, min, max) - min) / (max - min);
|
||||
}
|
||||
// we use std::lerp now
|
||||
//#define lerp(norm, min, max) ( (norm) * ((max) - (min)) + (min) )
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define STR(x) STRINGIFY(x)
|
||||
#define CONCAT_(x,y) x##y
|
||||
#define CONCAT(x,y) CONCAT_(x,y)
|
||||
|
||||
@@ -449,8 +449,8 @@ RwBool RwIm3DRenderPrimitive(RwPrimitiveType primType);
|
||||
|
||||
RwBool RwRenderStateGet(RwRenderState state, void *value)
|
||||
{
|
||||
uint32 *uival = (uint32*)value;
|
||||
uint32 fog;
|
||||
uint32_t *uival = (uint32_t*)value;
|
||||
uint32_t fog;
|
||||
switch(state){
|
||||
case rwRENDERSTATETEXTURERASTER: *(void**)value = GetRenderStatePtr(TEXTURERASTER); return true;
|
||||
case rwRENDERSTATETEXTUREADDRESS: *uival = GetRenderState(TEXTUREADDRESS); return true;
|
||||
@@ -496,8 +496,8 @@ RwBool RwRenderStateGet(RwRenderState state, void *value)
|
||||
}
|
||||
RwBool RwRenderStateSet(RwRenderState state, void *value)
|
||||
{
|
||||
uint32 uival = (uintptr)value;
|
||||
uint32 fog;
|
||||
uint32_t uival = (uintptr_t)value;
|
||||
uint32_t fog;
|
||||
switch(state){
|
||||
case rwRENDERSTATETEXTURERASTER: SetRenderStatePtr(TEXTURERASTER, value); return true;
|
||||
case rwRENDERSTATETEXTUREADDRESS: SetRenderState(TEXTUREADDRESS, uival); return true;
|
||||
@@ -544,8 +544,8 @@ RwBool RwRenderStateSet(RwRenderState state, void *value)
|
||||
static rw::MemoryFunctions gMemfuncs;
|
||||
static void *(*real_malloc)(size_t size);
|
||||
static void *(*real_realloc)(void *mem, size_t newSize);
|
||||
static void *mallocWrap(size_t sz, uint32 hint) { if(sz == 0) return nil; return real_malloc(sz); }
|
||||
static void *reallocWrap(void *p, size_t sz, uint32 hint) { return real_realloc(p, sz); }
|
||||
static void *mallocWrap(size_t sz, uint32_t hint) { if(sz == 0) return nil; return real_malloc(sz); }
|
||||
static void *reallocWrap(void *p, size_t sz, uint32_t hint) { return real_realloc(p, sz); }
|
||||
|
||||
|
||||
// WARNING: unused parameters
|
||||
@@ -830,7 +830,7 @@ RwCamera *RwCameraForAllClumpsNotInFrustum(RwCamera *camera, RwInt32 numClumps,
|
||||
RwBool RpMatFXPluginAttach( void ) { registerMatFXPlugin(); return true; }
|
||||
RpAtomic *RpMatFXAtomicEnableEffects( RpAtomic *atomic ) { MatFX::enableEffects(atomic); return atomic; }
|
||||
RpMatFXMaterialFlags RpMatFXMaterialGetEffects( const RpMaterial *material ){ return (RpMatFXMaterialFlags)MatFX::getEffects(material); }
|
||||
RpMaterial *RpMatFXMaterialSetEffects( RpMaterial *material, RpMatFXMaterialFlags flags ) { MatFX::setEffects(material, (uint32)flags); return material; }
|
||||
RpMaterial *RpMatFXMaterialSetEffects( RpMaterial *material, RpMatFXMaterialFlags flags ) { MatFX::setEffects(material, (uint32_t)flags); return material; }
|
||||
RpMaterial *RpMatFXMaterialSetupEnvMap( RpMaterial *material, RwTexture *texture, RwFrame *frame, RwBool useFrameBufferAlpha, RwReal coef ) {
|
||||
MatFX *mfx = MatFX::get(material);
|
||||
mfx->setEnvTexture(texture);
|
||||
|
||||
@@ -4,7 +4,7 @@ CMatrix::CMatrix(CMatrix const &m)
|
||||
{
|
||||
m_attachment = nil;
|
||||
m_hasRwMatrix = false;
|
||||
*this = m;
|
||||
mat_copy(*this, m);
|
||||
}
|
||||
|
||||
CMatrix::CMatrix(RwMatrix *matrix, bool owner)
|
||||
@@ -75,7 +75,7 @@ CMatrix::UpdateRW(void)
|
||||
void
|
||||
CMatrix::operator=(CMatrix const &rhs)
|
||||
{
|
||||
memcpy(this, &rhs, sizeof(f));
|
||||
mat_copy(*this, rhs);
|
||||
if (m_attachment)
|
||||
UpdateRW();
|
||||
}
|
||||
@@ -83,7 +83,7 @@ CMatrix::operator=(CMatrix const &rhs)
|
||||
void
|
||||
CMatrix::CopyOnlyMatrix(const CMatrix &other)
|
||||
{
|
||||
memcpy(this, &other, sizeof(f));
|
||||
mat_copy(*this, other);
|
||||
}
|
||||
|
||||
CMatrix &
|
||||
@@ -277,9 +277,9 @@ void
|
||||
CMatrix::RotateX(float x)
|
||||
{
|
||||
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
|
||||
mat_load(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate_x(x);
|
||||
mat_store(reinterpret_cast<matrix_t *>(this));
|
||||
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate_x(x);
|
||||
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
|
||||
#else
|
||||
auto [s, c] = SinCos(x);
|
||||
|
||||
@@ -307,9 +307,9 @@ void
|
||||
CMatrix::RotateY(float y)
|
||||
{
|
||||
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
|
||||
mat_load(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate_y(y);
|
||||
mat_store(reinterpret_cast<matrix_t *>(this));
|
||||
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate_y(y);
|
||||
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
|
||||
#else
|
||||
auto [s, c] = SinCos(y);
|
||||
|
||||
@@ -337,9 +337,9 @@ void
|
||||
CMatrix::RotateZ(float z)
|
||||
{
|
||||
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
|
||||
mat_load(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate_z(z);
|
||||
mat_store(reinterpret_cast<matrix_t *>(this));
|
||||
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate_z(z);
|
||||
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
|
||||
#else
|
||||
auto [s, c] = SinCos(z);
|
||||
|
||||
@@ -367,9 +367,9 @@ void
|
||||
CMatrix::Rotate(float x, float y, float z)
|
||||
{
|
||||
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
|
||||
mat_load(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate(x, y, z);
|
||||
mat_store(reinterpret_cast<matrix_t *>(this));
|
||||
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate(x, y, z);
|
||||
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
|
||||
#else
|
||||
auto [sX, cX] = SinCos(x);
|
||||
auto [sY, cY] = SinCos(y);
|
||||
@@ -449,65 +449,13 @@ CMatrix::Reorthogonalise(void)
|
||||
f = CrossProduct(u, r);
|
||||
}
|
||||
|
||||
#ifdef DC_SH4
|
||||
static __always_inline void MATH_Load_Matrix_Product(const matrix_t* matrix1, const matrix_t* matrix2)
|
||||
{
|
||||
unsigned int prefetch_scratch;
|
||||
|
||||
asm volatile (
|
||||
"mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
|
||||
"add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
|
||||
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
|
||||
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
|
||||
// back matrix
|
||||
"fmov.d @%[bmtrx]+, XD0\n\t" // (LS)
|
||||
"fmov.d @%[bmtrx]+, XD2\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD4\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD6\n\t"
|
||||
"pref @%[fmtrx]\n\t" // prefetch fmtrx now while we wait (LS)
|
||||
"fmov.d @%[bmtrx]+, XD8\n\t" // bmtrx prefetch should work for here
|
||||
"fmov.d @%[bmtrx]+, XD10\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD12\n\t"
|
||||
"mov %[fmtrx], %[pref_scratch]\n\t" // (MT)
|
||||
"add #32, %[pref_scratch]\n\t" // store offset by 32 in r0 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
|
||||
"fmov.d @%[bmtrx], XD14\n\t"
|
||||
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
|
||||
// front matrix
|
||||
// interleave loads and matrix multiply 4x4
|
||||
"fmov.d @%[fmtrx]+, DR0\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR2\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
|
||||
"ftrv XMTRX, FV0\n\t" // (FE)
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR6\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR8\n\t"
|
||||
"ftrv XMTRX, FV4\n\t"
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR10\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR12\n\t"
|
||||
"ftrv XMTRX, FV8\n\t"
|
||||
|
||||
"fmov.d @%[fmtrx], DR14\n\t" // (LS, but this will stall 'ftrv' for 3 cycles)
|
||||
"fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
|
||||
"ftrv XMTRX, FV12\n\t" // (FE)
|
||||
// Save output in XF regs
|
||||
"frchg\n"
|
||||
: [bmtrx] "+&r" ((unsigned int)matrix1), [fmtrx] "+r" ((unsigned int)matrix2), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
|
||||
: // no inputs
|
||||
: "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
CMatrix
|
||||
operator*(const CMatrix &m1, const CMatrix &m2)
|
||||
{
|
||||
// TODO: VU0 code
|
||||
CMatrix out;
|
||||
#if defined(RW_DC)
|
||||
mat_load(reinterpret_cast<const matrix_t *>(&m1));
|
||||
mat_apply(reinterpret_cast<const matrix_t *>(&m2));
|
||||
mat_store(reinterpret_cast<matrix_t *>(&out));
|
||||
#ifdef DC_SH4
|
||||
mat_mult(out, m1, m2);
|
||||
#else
|
||||
out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz;
|
||||
out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz;
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "rwdc_common.h"
|
||||
|
||||
class alignas(8) CMatrix
|
||||
{
|
||||
public:
|
||||
@@ -27,6 +29,8 @@ public:
|
||||
SetScale(scale);
|
||||
}
|
||||
~CMatrix(void);
|
||||
operator matrix_t *() { return reinterpret_cast<matrix_t *>(this); }
|
||||
operator const matrix_t *() const { return reinterpret_cast<const matrix_t *>(this); }
|
||||
void Attach(RwMatrix *matrix, bool owner = false);
|
||||
void AttachRW(RwMatrix *matrix, bool owner = false);
|
||||
void Detach(void);
|
||||
@@ -102,15 +106,24 @@ CMatrix Invert(const CMatrix &matrix);
|
||||
CMatrix operator*(const CMatrix &m1, const CMatrix &m2);
|
||||
inline CVector MultiplyInverse(const CMatrix &mat, const CVector &vec)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
CVector v(vec.x - mat.px, vec.y - mat.py, vec.z - mat.pz);
|
||||
return CVector(
|
||||
mat.rx * v.x + mat.ry * v.y + mat.rz * v.z,
|
||||
mat.fx * v.x + mat.fy * v.y + mat.fz * v.z,
|
||||
mat.ux * v.x + mat.uy * v.y + mat.uz * v.z);
|
||||
#else
|
||||
register float x asm(KOS_FPARG(0)) = vec.x - mat.px;
|
||||
register float y asm(KOS_FPARG(1)) = vec.y - mat.py;
|
||||
register float z asm(KOS_FPARG(2)) = vec.z - mat.pz;
|
||||
return CVector(
|
||||
fipr(x, y, z, 0.0f, mat.rx, mat.ry, mat.rz, 0.0f),
|
||||
fipr(x, y, z, 0.0f, mat.fx, mat.fy, mat.fz, 0.0f),
|
||||
fipr(x, y, z, 0.0f, mat.ux, mat.uy, mat.uz, 0.0f)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
class CCompressedMatrixNotAligned
|
||||
{
|
||||
CVector m_vecPos;
|
||||
|
||||
@@ -39,10 +39,14 @@ CQuaternion::Slerp(const CQuaternion &q1, const CQuaternion &q2, float theta, fl
|
||||
void
|
||||
CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||
y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||
z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||
w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||
#else
|
||||
quat_mult(*this, q1, q2);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@@ -51,9 +55,16 @@ CQuaternion::Get(RwV3d *axis, float *angle)
|
||||
*angle = Acos(w);
|
||||
float s = Sin(*angle);
|
||||
|
||||
#ifndef DC_SH4
|
||||
axis->x = x * (1.0f / s);
|
||||
axis->y = y * (1.0f / s);
|
||||
axis->z = z * (1.0f / s);
|
||||
#else
|
||||
float invS = dc::Invert<true, false>(s);
|
||||
axis->x = x * invS;
|
||||
axis->y = y * invS;
|
||||
axis->z = z * invS;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@@ -104,7 +115,7 @@ CQuaternion::Set(const RwMatrix &matrix)
|
||||
if (f >= 0.0f) {
|
||||
s = Sqrt(f + 1.0f);
|
||||
w = 0.5f * s;
|
||||
m = 0.5f / s;
|
||||
m = Div<true, false>(0.5f, s);
|
||||
x = (matrix.up.z - matrix.at.y) * m;
|
||||
y = (matrix.at.x - matrix.right.z) * m;
|
||||
z = (matrix.right.y - matrix.up.x) * m;
|
||||
@@ -115,7 +126,7 @@ CQuaternion::Set(const RwMatrix &matrix)
|
||||
if (f >= 0.0f) {
|
||||
s = Sqrt(f + 1.0f);
|
||||
x = 0.5f * s;
|
||||
m = 0.5f / s;
|
||||
m = Div<true, false>(0.5f, s);
|
||||
y = (matrix.up.x + matrix.right.y) * m;
|
||||
z = (matrix.at.x + matrix.right.z) * m;
|
||||
w = (matrix.up.z - matrix.at.y) * m;
|
||||
@@ -126,7 +137,7 @@ CQuaternion::Set(const RwMatrix &matrix)
|
||||
if (f >= 0.0f) {
|
||||
s = Sqrt(f + 1.0f);
|
||||
y = 0.5f * s;
|
||||
m = 0.5f / s;
|
||||
m = Div<true, false>(0.5f, s);
|
||||
w = (matrix.at.x - matrix.right.z) * m;
|
||||
x = (matrix.up.x - matrix.right.y) * m;
|
||||
z = (matrix.at.y + matrix.up.z) * m;
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "src/common_defines.h"
|
||||
#include "rwdc_common.h"
|
||||
|
||||
// TODO: actually implement this
|
||||
class CQuaternion
|
||||
{
|
||||
@@ -8,13 +11,23 @@ public:
|
||||
CQuaternion(void) {}
|
||||
CQuaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
|
||||
|
||||
float Magnitude(void) const { return Sqrt(MagnitudeSqr()); }
|
||||
operator quaternion_t *() { return reinterpret_cast<quaternion_t *>(this); }
|
||||
operator const quaternion_t *() const { return reinterpret_cast<const quaternion_t *>(this); }
|
||||
operator quaternion_t &() { return *reinterpret_cast<quaternion_t *>(this); }
|
||||
operator const quaternion_t &() const { return *reinterpret_cast<const quaternion_t *>(this); }
|
||||
float Magnitude(void) const {
|
||||
#ifndef DC_SH4
|
||||
return Sqrt(x*x + y*y + z*z + w*w);
|
||||
#else
|
||||
return Sqrt(fipr_magnitude_sqr(x, y, z, w));
|
||||
#endif
|
||||
}
|
||||
float MagnitudeSqr(void) const {
|
||||
#ifdef DC_SH4
|
||||
return fipr_magnitude_sqr(x, y, z, w);
|
||||
#else
|
||||
#ifndef DC_SH4
|
||||
return x*x + y*y + z*z + w*w;
|
||||
#endif
|
||||
#else
|
||||
return fipr_magnitude_sqr(x, y, z, w);
|
||||
#endif
|
||||
}
|
||||
void Normalise(void);
|
||||
void Multiply(const CQuaternion &q1, const CQuaternion &q2);
|
||||
@@ -49,10 +62,11 @@ public:
|
||||
}
|
||||
|
||||
const CQuaternion &operator/=(float right) {
|
||||
x /= right;
|
||||
y /= right;
|
||||
z /= right;
|
||||
w /= right;
|
||||
right = dc::Invert<false>(right);
|
||||
x *= right;
|
||||
y *= right;
|
||||
z *= right;
|
||||
w *= right;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -72,7 +86,11 @@ public:
|
||||
inline float
|
||||
DotProduct(const CQuaternion &q1, const CQuaternion &q2)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
return q1.x*q2.x + q1.y*q2.y + q1.z*q2.z + q1.w*q2.w;
|
||||
#else
|
||||
return fipr(q1.x, q1.y, q1.z, q1.w, q2.x, q2.y, q2.z, q2.w);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline CQuaternion operator+(const CQuaternion &left, const CQuaternion &right)
|
||||
@@ -97,5 +115,6 @@ inline CQuaternion operator*(float left, const CQuaternion &right)
|
||||
|
||||
inline CQuaternion operator/(const CQuaternion &left, float right)
|
||||
{
|
||||
return CQuaternion(left.x / right, left.y / right, left.z / right, left.w / right);
|
||||
right = Invert<false>(right);
|
||||
return CQuaternion(left.x * right, left.y * right, left.z * right, left.w * right);
|
||||
}
|
||||
|
||||
@@ -27,32 +27,35 @@ CrossProduct(const CVector &v1, const CVector &v2)
|
||||
CVector
|
||||
Multiply3x3(const CMatrix &mat, const CVector &vec)
|
||||
{
|
||||
#ifdef DC_SH4
|
||||
register float __x __asm__("fr12") = vec.x;
|
||||
register float __y __asm__("fr13") = vec.y;
|
||||
register float __z __asm__("fr14") = vec.z;
|
||||
register float __w __asm__("fr15") = 0.0f;
|
||||
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
|
||||
asm volatile( "ftrv xmtrx, fv12\n"
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) );
|
||||
return { __x, __y, __z };
|
||||
#else
|
||||
#ifndef DC_SH4
|
||||
// TODO: VU0 code
|
||||
return CVector(mat.rx * vec.x + mat.fx * vec.y + mat.ux * vec.z,
|
||||
mat.ry * vec.x + mat.fy * vec.y + mat.uy * vec.z,
|
||||
mat.rz * vec.x + mat.fz * vec.y + mat.uz * vec.z);
|
||||
#else
|
||||
CVector out;
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
|
||||
out.x, out.y, out.z);
|
||||
return out;
|
||||
#endif
|
||||
}
|
||||
|
||||
CVector
|
||||
Multiply3x3(const CVector &vec, const CMatrix &mat)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
return CVector(mat.rx * vec.x + mat.ry * vec.y + mat.rz * vec.z,
|
||||
mat.fx * vec.x + mat.fy * vec.y + mat.fz * vec.z,
|
||||
mat.ux * vec.x + mat.uy * vec.y + mat.uz * vec.z);
|
||||
#else
|
||||
CVector out;
|
||||
dc::mat_load2(mat);
|
||||
mat_transpose();
|
||||
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
|
||||
out.x, out.y, out.z);
|
||||
return out;
|
||||
#endif
|
||||
}
|
||||
|
||||
CVector
|
||||
@@ -60,7 +63,7 @@ operator*(const CMatrix &mat, const CVector &vec)
|
||||
{
|
||||
#ifdef DC_SH4
|
||||
CVector out;
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_single3_nodiv_nomod(vec.x, vec.y, vec.z, out.x, out.y, out.z);
|
||||
return out;
|
||||
#else
|
||||
|
||||
@@ -19,22 +19,22 @@ public:
|
||||
}
|
||||
// (0,1,0) means no rotation. So get right vector and its atan
|
||||
__always_inline float Heading(void) const { return Atan2(-x, y); }
|
||||
__always_inline float Magnitude(void) const {
|
||||
__always_inline float Magnitude(void) const {
|
||||
#ifdef DC_SH4
|
||||
float w;
|
||||
vec3f_length(x, y, z, w);
|
||||
return w;
|
||||
#else
|
||||
return Sqrt(x*x + y*y + z*z);
|
||||
return Sqrt(x*x + y*y + z*z);
|
||||
#endif
|
||||
}
|
||||
__always_inline float MagnitudeSqr(void) const {
|
||||
__always_inline float MagnitudeSqr(void) const {
|
||||
#ifdef DC_SH4
|
||||
return fipr_magnitude_sqr(x, y,z, 0.0f);
|
||||
return fipr_magnitude_sqr(x, y,z, 0.0f);
|
||||
#else
|
||||
return x*x + y*y + z*z;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
__always_inline float Magnitude2D(void) const { return Sqrt(x*x + y*y); }
|
||||
float MagnitudeSqr2D(void) const { return x*x + y*y; }
|
||||
void Normalise(void);
|
||||
@@ -68,7 +68,7 @@ public:
|
||||
}
|
||||
|
||||
const CVector &operator/=(float right) {
|
||||
right = Invert(right);
|
||||
right = Invert<false>(right);
|
||||
x *= right;
|
||||
y *= right;
|
||||
z *= right;
|
||||
@@ -112,7 +112,8 @@ inline CVector operator*(float left, const CVector &right)
|
||||
|
||||
inline CVector operator/(const CVector &left, float right)
|
||||
{
|
||||
return CVector(left.x / right, left.y / right, left.z / right);
|
||||
right = Invert<false>(right);
|
||||
return CVector(left.x * right, left.y * right, left.z * right);
|
||||
}
|
||||
|
||||
__always_inline float
|
||||
|
||||
@@ -44,7 +44,7 @@ __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const CV
|
||||
sqc2 vf06,0x0(%0)\n\
|
||||
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
|
||||
#elif defined(DC_SH4)
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_single3_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z);
|
||||
#else
|
||||
out = mat * in;
|
||||
@@ -71,7 +71,7 @@ __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const Rw
|
||||
sqc2 vf06,0x0(%0)\n\
|
||||
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
|
||||
#elif defined(DC_SH4)
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_single3_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z);
|
||||
#else
|
||||
out = mat * in;
|
||||
@@ -108,7 +108,7 @@ __always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat,
|
||||
bnez %1,1b\n\
|
||||
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
|
||||
#elif defined(DC_SH4)
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
while(n--) {
|
||||
mat_trans_single3_nodiv_nomod(in->x, in->y, in->z, out->x, out->y, out->z);
|
||||
in = reinterpret_cast<const RwV3d *>(reinterpret_cast<const uint8_t *>(in) + stride);
|
||||
|
||||
@@ -1,73 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "src/common_defines.h"
|
||||
#include "rwdc_common.h"
|
||||
|
||||
#include <tuple>
|
||||
#include <dc/matrix.h>
|
||||
|
||||
#ifdef DC_SH4
|
||||
|
||||
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_w_nodiv_nomod(x, y, z, w) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
w = __w; \
|
||||
} while(false)
|
||||
|
||||
__always_inline float Fmac(float a, float b, float c) {
|
||||
asm volatile ("fmac fr0, %[floatb], %[floatc]\n"
|
||||
: [floatc] "+f" (c) : "w" (a), [floatb] "f" (b) : );
|
||||
return c;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
|
||||
vector_t tmp = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp, &tmp, 1, 0); \
|
||||
x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; w2 = tmp.w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_w_nodiv_nomod(x_, y_, z_, w_) do { \
|
||||
vector_t tmp = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp, &tmp, 1, 0); \
|
||||
w_ = tmp.w; \
|
||||
} while(false)
|
||||
|
||||
__always_inline float Fmac(float a, float b, float c) { return a * b + c; }
|
||||
|
||||
#endif
|
||||
|
||||
__always_inline float Sin(float x) { return __builtin_sinf(x); }
|
||||
__always_inline float Cos(float x) { return __builtin_cosf(x); }
|
||||
__always_inline auto SinCos(float x) { return std::pair { Sin(x), Cos(x) }; }
|
||||
__always_inline float Tan(float x) { return __builtin_tanf(x); }
|
||||
__always_inline float Abs(float x) { return __builtin_fabsf(x); }
|
||||
__always_inline float Sqrt(float x) { return __builtin_sqrtf(x); }
|
||||
__always_inline float RecipSqrt(float x) { return 1.0f / __builtin_sqrtf(x); }
|
||||
__always_inline float Asin(float x) { return __builtin_asinf(x); }
|
||||
__always_inline float Acos(float x) { return __builtin_acosf(x); }
|
||||
__always_inline float Atan(float x) { return __builtin_atanf(x); }
|
||||
__always_inline float Atan2(float y, float x) { return __builtin_atan2f(y, x); }
|
||||
__always_inline float RecipSqrt(float x, float y) { return x / __builtin_sqrtf(y); /*y = RecipSqrt(y); return x * y * y;*/ }
|
||||
__always_inline float Pow(float x, float y) { return __builtin_powf(x, y); }
|
||||
__always_inline float Floor(float x) { return __builtin_floorf(x); }
|
||||
__always_inline float Ceil(float x) { return __builtin_ceilf(x); }
|
||||
__always_inline float Invert(float x) { return (((x) < 0.0f)? -1.0f : 1.0f) * RecipSqrt((x) * (x)); }
|
||||
__always_inline float Div(float x, float y) { return x * Invert(y); }
|
||||
__always_inline float Lerp(float a, float b, float t) { return Fmac(t, (b - a), a); }
|
||||
using namespace dc;
|
||||
|
||||
@@ -13,7 +13,9 @@ enum eWinVersion
|
||||
OS_WINXP,
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#elif defined(_WIN32)
|
||||
|
||||
// As long as WITHWINDOWS isn't defined / <Windows.h> isn't included, we only need type definitions so let's include <IntSafe.h>.
|
||||
// NOTE: It's perfectly fine to include <Windows.h> here, but it can increase build size and time in *some* conditions, and maybe substantially in future if we'll use crossplatform.h more.
|
||||
|
||||
@@ -1473,7 +1473,7 @@ CCollision::ProcessLineOfSight(const CColLine &line,
|
||||
point.point = matrix * point.point;
|
||||
point.normal = Multiply3x3(matrix, point.normal);
|
||||
#else
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
|
||||
dc::mat_load2(matrix);
|
||||
mat_trans_single3_nodiv(point.point.x,
|
||||
point.point.y,
|
||||
point.point.z);
|
||||
@@ -1653,7 +1653,7 @@ CCollision::ProcessVerticalLine(const CColLine &line,
|
||||
point.point = matrix * point.point;
|
||||
point.normal = Multiply3x3(matrix, point.normal);
|
||||
#else
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
|
||||
dc::mat_load2(matrix);
|
||||
mat_trans_single3_nodiv(point.point.x,
|
||||
point.point.y,
|
||||
point.point.z);
|
||||
@@ -2027,8 +2027,7 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
|
||||
#ifndef DC_SH4
|
||||
matAB *= matrixA;
|
||||
#else
|
||||
mat_load(reinterpret_cast<const matrix_t*>(&matAB));
|
||||
mat_apply(reinterpret_cast<const matrix_t*>(&matrixA));
|
||||
dc::mat_load_apply(matAB, matrixA);
|
||||
#endif
|
||||
|
||||
CColSphere bsphereAB; // bounding sphere of A in B space
|
||||
@@ -2099,8 +2098,7 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
|
||||
#ifndef DC_SH4
|
||||
matBA *= matrixB;
|
||||
#else
|
||||
mat_load(reinterpret_cast<const matrix_t*>(&matBA));
|
||||
mat_apply(reinterpret_cast<const matrix_t*>(&matrixB));
|
||||
dc::mat_load_apply(matBA, matrixB);
|
||||
#endif
|
||||
for(i = 0; i < modelB.numSpheres; i++){
|
||||
s.radius = modelB.spheres[i].radius;
|
||||
@@ -2162,7 +2160,7 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
|
||||
}
|
||||
|
||||
#ifdef DC_SH4
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrixB)));
|
||||
dc::mat_load2(matrixB);
|
||||
#endif
|
||||
for(i = 0; i < numCollisions; i++){
|
||||
#ifndef DC_SH4
|
||||
|
||||
@@ -2042,7 +2042,7 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
// CONTROL_CLASSIC : more like GTA: Liberty City Stories on PlayStation Portable
|
||||
|
||||
//static wchar ActionText[50];
|
||||
static wchar ActionText[24];
|
||||
static wchar ActionText[32];
|
||||
for (int32 i = 0; i < ARRAY_SIZE(ActionText); i++)
|
||||
ActionText[i] = '\0';
|
||||
|
||||
@@ -2066,14 +2066,17 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
static const char* Dreamcast_VehicleLookLeft = "A + Joystick Left";
|
||||
static const char* Dreamcast_VehicleLookRight = "A + Joystick Right";
|
||||
static const char* Dreamcast_CenterCamera = "Double click X";
|
||||
static const char* Dreamcast_CameraUp = "X + Joystick Up";
|
||||
static const char* Dreamcast_CameraDown = "X + Joystick Down";
|
||||
static const char* Dreamcast_CameraLeft = "X + Joystick Left";
|
||||
static const char* Dreamcast_CameraRight = "X + Joystick Right";
|
||||
static const char* Dreamcast_LookBehind = "Right trigger + Left trigger";
|
||||
//static const char* Dreamcast_CameraUp = "X + Joystick Up"; // Not used, commented to saving memory
|
||||
//static const char* Dreamcast_CameraDown = "X + Joystick Down"; // Not used, commented to saving memory
|
||||
//static const char* Dreamcast_CameraLeft = "X + Joystick Left"; // Not used, commented to saving memory
|
||||
//static const char* Dreamcast_CameraRight = "X + Joystick Right"; // Not used, commented to saving memory
|
||||
|
||||
//static const char* Dreamcast_ReplaceMeleeWeapon = "A + D-Pad Down";
|
||||
//static const char* Dreamcast_BothTriggers = "Right Trigger + Left Trigger";
|
||||
static const char* Dreamcast_BothTriggers = "Two Triggers / A + D-Pad Down"; // Not a perfect fix but avoid problem with PED_ANSWER_PHONE
|
||||
static const char* Dreamcast_NotAssigned = "Not Assigned";
|
||||
|
||||
//Dual sticks
|
||||
// Dual Sticks
|
||||
static const char* LefAnalog_Up = "Left Stick Up";
|
||||
static const char* LefAnalog_Down = "Left Stick Down";
|
||||
static const char* LefAnalog_Left = "Left Stick Left";
|
||||
@@ -2084,14 +2087,13 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
static const char* RightAnalog_Left = "Right Stick Left";
|
||||
static const char* RightAnalog_Right = "Right Stick Right";
|
||||
|
||||
//Xbox Specific
|
||||
// Xbox Specific
|
||||
static const char* Xbox_VehicleLookLeft = "LB";
|
||||
static const char* Xbox_VehicleLookRight = "RB";
|
||||
static const char* Xbox_VehicleLookBehind = "RB + LB";
|
||||
static const char* Xbox_Back = "Back";
|
||||
|
||||
|
||||
//PS2 Specific
|
||||
// PS2 Specific
|
||||
static const char* Dreamcast_PS2_VehicleLookLeft = "B + Joystick Left";
|
||||
static const char* Dreamcast_PS2_VehicleLookRight = "B + Joystick Right";
|
||||
static const char* PS2_L1 = "L1";
|
||||
@@ -2125,6 +2127,9 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
case VEHICLE_ACCELERATE:
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_RightTrigger[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case PED_ANSWER_PHONE: // Both Triggers to answer the Phone and switch the Melee Weapon on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_CHANGE_RADIO_STATION: // D-Pad Right to switch RADIO on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_DPad_Right[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
@@ -2213,7 +2218,7 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_DPad_Up[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_LOOKBEHIND:
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_LookBehind[i]) != '\0' && i < iLimitCopy; i++);
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case NETWORK_TALK: // Not Used on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_NotAssigned[i]) != '\0' && i < iLimitCopy; i++);
|
||||
@@ -2263,6 +2268,9 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
case VEHICLE_ACCELERATE:
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_RightTrigger[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case PED_ANSWER_PHONE: // Both Triggers to answer the Phone on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_CHANGE_RADIO_STATION: // D-Pad Right to switch RADIO on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_DPad_Right[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
@@ -2401,7 +2409,9 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
case VEHICLE_ACCELERATE:
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_A[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
|
||||
case PED_ANSWER_PHONE: // Both Triggers to answer the Phone on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_CHANGE_RADIO_STATION: // D-Pad Right to switch RADIO on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_DPad_Right[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
@@ -2490,7 +2500,7 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_DPad_Up[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_LOOKBEHIND:
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_LookBehind[i]) != '\0' && i < iLimitCopy; i++);
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case NETWORK_TALK: // Not Used on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_NotAssigned[i]) != '\0' && i < iLimitCopy; i++);
|
||||
@@ -2541,6 +2551,9 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
case VEHICLE_ACCELERATE:
|
||||
for (int i = 0; (ActionText[i] = PS2_Cross[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case PED_ANSWER_PHONE: // Both Triggers to answer the Phone on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_CHANGE_RADIO_STATION:
|
||||
for (int i = 0; (ActionText[i] = PS2_L1[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
@@ -2629,7 +2642,7 @@ wchar *CControllerConfigManager::GetControllerSettingTextWithOrderNumber(e_Contr
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_DPad_Up[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case VEHICLE_LOOKBEHIND:
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_LookBehind[i]) != '\0' && i < iLimitCopy; i++);
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_BothTriggers[i]) != '\0' && i < iLimitCopy; i++);
|
||||
break;
|
||||
case NETWORK_TALK: // Not Used on Dreamcast
|
||||
for (int i = 0; (ActionText[i] = Dreamcast_NotAssigned[i]) != '\0' && i < iLimitCopy; i++);
|
||||
|
||||
@@ -85,8 +85,6 @@
|
||||
|
||||
#define rwVENDORID_ROCKSTAR 0x0253F2
|
||||
|
||||
#define Max(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define Min(a,b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
// Use this to add const that wasn't there in the original code
|
||||
#define Const const
|
||||
@@ -303,12 +301,8 @@ extern int strncasecmp(const char *str1, const char *str2, size_t len);
|
||||
|
||||
extern wchar *AllocUnicode(const char*src);
|
||||
|
||||
#define Clamp(v, low, high) ((v)<(low) ? (low) : (v)>(high) ? (high) : (v))
|
||||
|
||||
#define Clamp2(v, center, radius) ((v) > (center) ? Min(v, center + radius) : Max(v, center - radius))
|
||||
|
||||
inline float sq(float x) { return x*x; }
|
||||
#define SQR(x) ((x) * (x))
|
||||
__always_inline auto sq(auto x) { return SQR(x); }
|
||||
|
||||
#ifdef __MWERKS__
|
||||
#define M_E 2.71828182845904523536 // e
|
||||
@@ -326,7 +320,10 @@ inline float sq(float x) { return x*x; }
|
||||
#define M_SQRT1_2 0.707106781186547524401 // 1/sqrt(2)
|
||||
#endif
|
||||
|
||||
#define PI (float)M_PI
|
||||
#ifndef DC_SH4
|
||||
#define F_PI M_PI
|
||||
#endif
|
||||
#define PI (float)F_PI
|
||||
#define TWOPI (PI*2)
|
||||
#define HALFPI (PI/2)
|
||||
#define DEGTORAD(x) ((x) * PI / 180.0f)
|
||||
@@ -380,7 +377,10 @@ __inline__ void TRACE(char *f, ...) { } // this is re3 only, and so the function
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MASTER
|
||||
#ifdef assert
|
||||
#undef assert
|
||||
#endif
|
||||
#if !defined(MASTER)
|
||||
#define assert(_Expression) (void)( (!!(_Expression)) || (re3_assert(#_Expression, __FILE__, __LINE__, __FUNCTION__), 0) )
|
||||
#else
|
||||
#define assert(_Expression) (_Expression)
|
||||
@@ -411,12 +411,7 @@ template<int s, int t> struct check_size {
|
||||
#endif
|
||||
#define BIT(num) (1<<(num))
|
||||
|
||||
#define ABS(a) (((a) < 0) ? (-(a)) : (a))
|
||||
#define norm(value, min, max) (((value) < (min)) ? 0 : (((value) > (max)) ? 1 : (((value) - (min)) / ((max) - (min)))))
|
||||
#define Lerp(norm, min, max) ( (norm) * ((max) - (min)) + (min) )
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define STR(x) STRINGIFY(x)
|
||||
#define CONCAT_(x,y) x##y
|
||||
#define CONCAT(x,y) CONCAT_(x,y)
|
||||
#define ABS(a) Abs(a)
|
||||
|
||||
// we use std::lerp now
|
||||
//#define lerp(norm, min, max) ( (norm) * ((max) - (min)) + (min) )
|
||||
|
||||
@@ -450,8 +450,8 @@ RwBool RwIm3DRenderPrimitive(RwPrimitiveType primType);
|
||||
|
||||
RwBool RwRenderStateGet(RwRenderState state, void *value)
|
||||
{
|
||||
uint32 *uival = (uint32*)value;
|
||||
uint32 fog;
|
||||
uint32_t *uival = (uint32_t*)value;
|
||||
uint32_t fog;
|
||||
switch(state){
|
||||
case rwRENDERSTATETEXTURERASTER: *(void**)value = GetRenderStatePtr(TEXTURERASTER); return true;
|
||||
case rwRENDERSTATETEXTUREADDRESS: *uival = GetRenderState(TEXTUREADDRESS); return true;
|
||||
@@ -497,8 +497,8 @@ RwBool RwRenderStateGet(RwRenderState state, void *value)
|
||||
}
|
||||
RwBool RwRenderStateSet(RwRenderState state, void *value)
|
||||
{
|
||||
uint32 uival = (uintptr)value;
|
||||
uint32 fog;
|
||||
uint32_t uival = (uintptr_t)value;
|
||||
uint32_t fog;
|
||||
switch(state){
|
||||
case rwRENDERSTATETEXTURERASTER: SetRenderStatePtr(TEXTURERASTER, value); return true;
|
||||
case rwRENDERSTATETEXTUREADDRESS: SetRenderState(TEXTUREADDRESS, uival); return true;
|
||||
@@ -545,8 +545,8 @@ RwBool RwRenderStateSet(RwRenderState state, void *value)
|
||||
static rw::MemoryFunctions gMemfuncs;
|
||||
static void *(*real_malloc)(size_t size);
|
||||
static void *(*real_realloc)(void *mem, size_t newSize);
|
||||
static void *mallocWrap(size_t sz, uint32 hint) { if(sz == 0) return nil; return real_malloc(sz); }
|
||||
static void *reallocWrap(void *p, size_t sz, uint32 hint) { return real_realloc(p, sz); }
|
||||
static void *mallocWrap(size_t sz, uint32_t hint) { if(sz == 0) return nil; return real_malloc(sz); }
|
||||
static void *reallocWrap(void *p, size_t sz, uint32_t hint) { return real_realloc(p, sz); }
|
||||
|
||||
|
||||
// WARNING: unused parameters
|
||||
@@ -832,7 +832,7 @@ RwCamera *RwCameraForAllClumpsNotInFrustum(RwCamera *camera, RwInt32 numClumps,
|
||||
RwBool RpMatFXPluginAttach( void ) { registerMatFXPlugin(); return true; }
|
||||
RpAtomic *RpMatFXAtomicEnableEffects( RpAtomic *atomic ) { MatFX::enableEffects(atomic); return atomic; }
|
||||
RpMatFXMaterialFlags RpMatFXMaterialGetEffects( const RpMaterial *material ){ return (RpMatFXMaterialFlags)MatFX::getEffects(material); }
|
||||
RpMaterial *RpMatFXMaterialSetEffects( RpMaterial *material, RpMatFXMaterialFlags flags ) { MatFX::setEffects(material, (uint32)flags); return material; }
|
||||
RpMaterial *RpMatFXMaterialSetEffects( RpMaterial *material, RpMatFXMaterialFlags flags ) { MatFX::setEffects(material, (uint32_t)flags); return material; }
|
||||
RpMaterial *RpMatFXMaterialSetupEnvMap( RpMaterial *material, RwTexture *texture, RwFrame *frame, RwBool useFrameBufferAlpha, RwReal coef ) {
|
||||
MatFX *mfx = MatFX::get(material);
|
||||
mfx->setEnvTexture(texture);
|
||||
|
||||
@@ -4,7 +4,7 @@ CMatrix::CMatrix(CMatrix const &m)
|
||||
{
|
||||
m_attachment = nil;
|
||||
m_hasRwMatrix = false;
|
||||
*this = m;
|
||||
mat_copy(*this, m);
|
||||
}
|
||||
|
||||
CMatrix::CMatrix(RwMatrix *matrix, bool owner)
|
||||
@@ -75,7 +75,7 @@ CMatrix::UpdateRW(void)
|
||||
void
|
||||
CMatrix::operator=(CMatrix const &rhs)
|
||||
{
|
||||
memcpy(this, &rhs, sizeof(f));
|
||||
mat_copy(*this, rhs);
|
||||
if (m_attachment)
|
||||
UpdateRW();
|
||||
}
|
||||
@@ -83,7 +83,7 @@ CMatrix::operator=(CMatrix const &rhs)
|
||||
void
|
||||
CMatrix::CopyOnlyMatrix(const CMatrix &other)
|
||||
{
|
||||
memcpy(this, &other, sizeof(f));
|
||||
mat_copy(*this, other);
|
||||
}
|
||||
|
||||
CMatrix &
|
||||
@@ -358,12 +358,14 @@ CMatrix::RotateZ(float z)
|
||||
void
|
||||
CMatrix::Rotate(float x, float y, float z)
|
||||
{
|
||||
float cX = Cos(x);
|
||||
float sX = Sin(x);
|
||||
float cY = Cos(y);
|
||||
float sY = Sin(y);
|
||||
float cZ = Cos(z);
|
||||
float sZ = Sin(z);
|
||||
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
|
||||
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
|
||||
mat_rotate(x, y, z);
|
||||
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
|
||||
#else
|
||||
auto [sX, cX] = SinCos(x);
|
||||
auto [sY, cY] = SinCos(y);
|
||||
auto [sZ, cZ] = SinCos(z);
|
||||
|
||||
float rx = this->rx;
|
||||
float ry = this->ry;
|
||||
@@ -388,6 +390,20 @@ CMatrix::Rotate(float x, float y, float z)
|
||||
float z2 = sZ * sY - (cZ * sX) * cY;
|
||||
float z3 = cX * cY;
|
||||
|
||||
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
|
||||
this->rx = fipr(x1, y1, z1, 0, rx, ry, rz, 0);
|
||||
this->ry = fipr(x2, y2, z2, 0, rx, ry, rz, 0);
|
||||
this->rz = fipr(x3, y3, z3, 0, rx, ry, rz, 0);
|
||||
this->fx = fipr(x1, y1, z1, 0, ux, uy, uz, 0);
|
||||
this->fy = fipr(x2, y2, z2, 0, ux, uy, uz, 0);
|
||||
this->fz = fipr(x3, y3, z3, 0, ux, uy, uz, 0);
|
||||
this->ux = fipr(x1, y1, z1, 0, ax, ay, az, 0);
|
||||
this->uy = fipr(x2, y2, z2, 0, ax, ay, az, 0);
|
||||
this->uz = fipr(x3, y3, z3, 0, ax, ay, az, 0);
|
||||
this->px = fipr(x1, y1, z1, 0, px, py, pz, 0);
|
||||
this->py = fipr(x2, y2, z2, 0, px, py, pz, 0);
|
||||
this->pz = fipr(x3, y3, z3, 0, px, py, pz, 0);
|
||||
#else
|
||||
this->rx = x1 * rx + y1 * ry + z1 * rz;
|
||||
this->ry = x2 * rx + y2 * ry + z2 * rz;
|
||||
this->rz = x3 * rx + y3 * ry + z3 * rz;
|
||||
@@ -400,6 +416,8 @@ CMatrix::Rotate(float x, float y, float z)
|
||||
this->px = x1 * px + y1 * py + z1 * pz;
|
||||
this->py = x2 * px + y2 * py + z2 * pz;
|
||||
this->pz = x3 * px + y3 * py + z3 * pz;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
CMatrix &
|
||||
@@ -429,9 +447,7 @@ operator*(const CMatrix &m1, const CMatrix &m2)
|
||||
// TODO: VU0 code
|
||||
CMatrix out;
|
||||
#if defined(RW_DC)
|
||||
mat_load(reinterpret_cast<const matrix_t *>(&m1));
|
||||
mat_apply(reinterpret_cast<const matrix_t *>(&m2));
|
||||
mat_store(reinterpret_cast<matrix_t *>(&out));
|
||||
mat_mult(out, m1, m2);
|
||||
#else
|
||||
out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz;
|
||||
out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz;
|
||||
|
||||
@@ -43,6 +43,8 @@ public:
|
||||
SetScale(scale);
|
||||
}
|
||||
~CMatrix(void);
|
||||
operator matrix_t *() { return reinterpret_cast<matrix_t *>(this); }
|
||||
operator const matrix_t *() const { return reinterpret_cast<const matrix_t *>(this); }
|
||||
void Attach(RwMatrix *matrix, bool owner = false);
|
||||
void AttachRW(RwMatrix *matrix, bool owner = false);
|
||||
void Detach(void);
|
||||
@@ -82,13 +84,12 @@ public:
|
||||
void Scale(float sx, float sy, float sz)
|
||||
{
|
||||
for (int i = 0; i < 3; i++){
|
||||
f[i][0] *= sx;
|
||||
f[i][1] *= sy;
|
||||
f[i][2] *= sz;
|
||||
f[i][0] *= sx;
|
||||
f[i][1] *= sy;
|
||||
f[i][2] *= sz;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SetRotateXOnly(float angle);
|
||||
void SetRotateYOnly(float angle);
|
||||
void SetRotateZOnly(float angle);
|
||||
@@ -125,11 +126,22 @@ CMatrix Invert(const CMatrix &matrix);
|
||||
CMatrix operator*(const CMatrix &m1, const CMatrix &m2);
|
||||
inline CVector MultiplyInverse(const CMatrix &mat, const CVector &vec)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
CVector v(vec.x - mat.px, vec.y - mat.py, vec.z - mat.pz);
|
||||
return CVector(
|
||||
mat.rx * v.x + mat.ry * v.y + mat.rz * v.z,
|
||||
mat.fx * v.x + mat.fy * v.y + mat.fz * v.z,
|
||||
mat.ux * v.x + mat.uy * v.y + mat.uz * v.z);
|
||||
#else
|
||||
register float x asm(KOS_FPARG(0)) = vec.x - mat.px;
|
||||
register float y asm(KOS_FPARG(1)) = vec.y - mat.py;
|
||||
register float z asm(KOS_FPARG(2)) = vec.z - mat.pz;
|
||||
return CVector(
|
||||
fipr(x, y, z, 0.0f, mat.rx, mat.ry, mat.rz, 0.0f),
|
||||
fipr(x, y, z, 0.0f, mat.fx, mat.fy, mat.fz, 0.0f),
|
||||
fipr(x, y, z, 0.0f, mat.ux, mat.uy, mat.uz, 0.0f)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -39,10 +39,14 @@ CQuaternion::Slerp(const CQuaternion &q1, const CQuaternion &q2, float theta, fl
|
||||
void
|
||||
CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||
y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||
z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||
w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||
#else
|
||||
quat_mult(*this, q1, q2);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@@ -51,9 +55,16 @@ CQuaternion::Get(RwV3d *axis, float *angle)
|
||||
*angle = Acos(w);
|
||||
float s = Sin(*angle);
|
||||
|
||||
#ifndef DC_SH4
|
||||
axis->x = x * (1.0f / s);
|
||||
axis->y = y * (1.0f / s);
|
||||
axis->z = z * (1.0f / s);
|
||||
#else
|
||||
float invS = dc::Invert<true, false>(s);
|
||||
axis->x = x * invS;
|
||||
axis->y = y * invS;
|
||||
axis->z = z * invS;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@@ -104,7 +115,7 @@ CQuaternion::Set(const RwMatrix &matrix)
|
||||
if (f >= 0.0f) {
|
||||
s = Sqrt(f + 1.0f);
|
||||
w = 0.5f * s;
|
||||
m = 0.5f / s;
|
||||
m = Div<true, false>(0.5f, s);
|
||||
x = (matrix.up.z - matrix.at.y) * m;
|
||||
y = (matrix.at.x - matrix.right.z) * m;
|
||||
z = (matrix.right.y - matrix.up.x) * m;
|
||||
@@ -115,7 +126,7 @@ CQuaternion::Set(const RwMatrix &matrix)
|
||||
if (f >= 0.0f) {
|
||||
s = Sqrt(f + 1.0f);
|
||||
x = 0.5f * s;
|
||||
m = 0.5f / s;
|
||||
m = Div<true, false>(0.5f, s);
|
||||
y = (matrix.up.x + matrix.right.y) * m;
|
||||
z = (matrix.at.x + matrix.right.z) * m;
|
||||
w = (matrix.up.z - matrix.at.y) * m;
|
||||
@@ -126,7 +137,7 @@ CQuaternion::Set(const RwMatrix &matrix)
|
||||
if (f >= 0.0f) {
|
||||
s = Sqrt(f + 1.0f);
|
||||
y = 0.5f * s;
|
||||
m = 0.5f / s;
|
||||
m = Div<true, false>(0.5f, s);
|
||||
w = (matrix.at.x - matrix.right.z) * m;
|
||||
x = (matrix.up.x - matrix.right.y) * m;
|
||||
z = (matrix.at.y + matrix.up.z) * m;
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "src/common_defines.h"
|
||||
#include "rwdc_common.h"
|
||||
|
||||
// TODO: actually implement this
|
||||
class CQuaternion
|
||||
{
|
||||
@@ -8,8 +11,24 @@ public:
|
||||
CQuaternion(void) {}
|
||||
CQuaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
|
||||
|
||||
float Magnitude(void) const { return Sqrt(x*x + y*y + z*z + w*w); }
|
||||
float MagnitudeSqr(void) const { return x*x + y*y + z*z + w*w; }
|
||||
operator quaternion_t *() { return reinterpret_cast<quaternion_t *>(this); }
|
||||
operator const quaternion_t *() const { return reinterpret_cast<const quaternion_t *>(this); }
|
||||
operator quaternion_t &() { return *reinterpret_cast<quaternion_t *>(this); }
|
||||
operator const quaternion_t &() const { return *reinterpret_cast<const quaternion_t *>(this); }
|
||||
float Magnitude(void) const {
|
||||
#ifndef DC_SH4
|
||||
return Sqrt(x*x + y*y + z*z + w*w);
|
||||
#else
|
||||
return Sqrt(fipr_magnitude_sqr(x, y, z, w));
|
||||
#endif
|
||||
}
|
||||
float MagnitudeSqr(void) const {
|
||||
#ifndef DC_SH4
|
||||
return x*x + y*y + z*z + w*w;
|
||||
#else
|
||||
return fipr_magnitude_sqr(x, y, z, w);
|
||||
#endif
|
||||
}
|
||||
void Normalise(void);
|
||||
void Multiply(const CQuaternion &q1, const CQuaternion &q2);
|
||||
void Invert(void){ // Conjugate would have been a better name
|
||||
@@ -43,10 +62,11 @@ public:
|
||||
}
|
||||
|
||||
const CQuaternion &operator/=(float right) {
|
||||
x /= right;
|
||||
y /= right;
|
||||
z /= right;
|
||||
w /= right;
|
||||
right = dc::Invert<false>(right);
|
||||
x *= right;
|
||||
y *= right;
|
||||
z *= right;
|
||||
w *= right;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -66,7 +86,11 @@ public:
|
||||
inline float
|
||||
DotProduct(const CQuaternion &q1, const CQuaternion &q2)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
return q1.x*q2.x + q1.y*q2.y + q1.z*q2.z + q1.w*q2.w;
|
||||
#else
|
||||
return fipr(q1.x, q1.y, q1.z, q1.w, q2.x, q2.y, q2.z, q2.w);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline CQuaternion operator+(const CQuaternion &left, const CQuaternion &right)
|
||||
@@ -91,5 +115,6 @@ inline CQuaternion operator*(float left, const CQuaternion &right)
|
||||
|
||||
inline CQuaternion operator/(const CQuaternion &left, float right)
|
||||
{
|
||||
return CQuaternion(left.x / right, left.y / right, left.z / right, left.w / right);
|
||||
right = Invert<false>(right);
|
||||
return CQuaternion(left.x * right, left.y * right, left.z * right, left.w * right);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
void
|
||||
CVector::Normalise(void)
|
||||
{
|
||||
#ifdef DC_SH4_BROKEN
|
||||
// TODO: This needs to handle zero vectors here
|
||||
vec3f_normalize(x, y, z);
|
||||
#else
|
||||
float sq = MagnitudeSqr();
|
||||
if (sq > 0.0f) {
|
||||
float invsqrt = RecipSqrt(sq);
|
||||
@@ -11,6 +15,7 @@ CVector::Normalise(void)
|
||||
z *= invsqrt;
|
||||
} else
|
||||
x = 1.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
CVector
|
||||
@@ -22,25 +27,49 @@ CrossProduct(const CVector &v1, const CVector &v2)
|
||||
CVector
|
||||
Multiply3x3(const CMatrix &mat, const CVector &vec)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
// TODO: VU0 code
|
||||
return CVector(mat.rx * vec.x + mat.fx * vec.y + mat.ux * vec.z,
|
||||
mat.ry * vec.x + mat.fy * vec.y + mat.uy * vec.z,
|
||||
mat.rz * vec.x + mat.fz * vec.y + mat.uz * vec.z);
|
||||
#else
|
||||
CVector out;
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
|
||||
out.x, out.y, out.z);
|
||||
return out;
|
||||
#endif
|
||||
}
|
||||
|
||||
CVector
|
||||
Multiply3x3(const CVector &vec, const CMatrix &mat)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
return CVector(mat.rx * vec.x + mat.ry * vec.y + mat.rz * vec.z,
|
||||
mat.fx * vec.x + mat.fy * vec.y + mat.fz * vec.z,
|
||||
mat.ux * vec.x + mat.uy * vec.y + mat.uz * vec.z);
|
||||
#else
|
||||
CVector out;
|
||||
dc::mat_load2(mat);
|
||||
mat_transpose();
|
||||
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
|
||||
out.x, out.y, out.z);
|
||||
return out;
|
||||
#endif
|
||||
}
|
||||
|
||||
CVector
|
||||
operator*(const CMatrix &mat, const CVector &vec)
|
||||
{
|
||||
#ifdef DC_SH4
|
||||
CVector out;
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_single3_nodiv_nomod(vec.x, vec.y, vec.z, out.x, out.y, out.z);
|
||||
return out;
|
||||
#else
|
||||
// TODO: VU0 code
|
||||
return CVector(mat.rx * vec.x + mat.fx * vec.y + mat.ux * vec.z + mat.px,
|
||||
mat.ry * vec.x + mat.fy * vec.y + mat.uy * vec.z + mat.py,
|
||||
mat.rz * vec.x + mat.fz * vec.y + mat.uz * vec.z + mat.pz);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -18,11 +18,25 @@ public:
|
||||
z = v.z;
|
||||
}
|
||||
// (0,1,0) means no rotation. So get right vector and its atan
|
||||
float Heading(void) const { return Atan2(-x, y); }
|
||||
float Magnitude(void) const { return Sqrt(x*x + y*y + z*z); }
|
||||
float MagnitudeSqr(void) const { return x*x + y*y + z*z; }
|
||||
float Magnitude2D(void) const { return Sqrt(x*x + y*y); }
|
||||
float MagnitudeSqr2D(void) const { return x*x + y*y; }
|
||||
__always_inline float Heading(void) const { return Atan2(-x, y); }
|
||||
__always_inline float Magnitude(void) const {
|
||||
#ifdef DC_SH4
|
||||
float w;
|
||||
vec3f_length(x, y, z, w);
|
||||
return w;
|
||||
#else
|
||||
return Sqrt(x*x + y*y + z*z);
|
||||
#endif
|
||||
}
|
||||
__always_inline float MagnitudeSqr(void) const {
|
||||
#ifdef DC_SH4
|
||||
return fipr_magnitude_sqr(x, y,z, 0.0f);
|
||||
#else
|
||||
return x*x + y*y + z*z;
|
||||
#endif
|
||||
}
|
||||
__always_inline float Magnitude2D(void) const { return Sqrt(x*x + y*y); }
|
||||
float MagnitudeSqr2D(void) const { return x*x + y*y; }
|
||||
void Normalise(void);
|
||||
|
||||
void Normalise2D(void) {
|
||||
@@ -103,7 +117,11 @@ inline CVector operator/(const CVector &left, float right)
|
||||
inline float
|
||||
DotProduct(const CVector &v1, const CVector &v2)
|
||||
{
|
||||
return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
|
||||
#ifdef DC_SH4
|
||||
return fipr(v1.x, v1.y, v1.z, 0.0f, v2.x, v2.y, v2.z, 0.0f);
|
||||
#else
|
||||
return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
|
||||
#endif
|
||||
}
|
||||
|
||||
CVector CrossProduct(const CVector &v1, const CVector &v2);
|
||||
@@ -111,7 +129,13 @@ CVector CrossProduct(const CVector &v1, const CVector &v2);
|
||||
inline float
|
||||
Distance(const CVector &v1, const CVector &v2)
|
||||
{
|
||||
return (v2 - v1).Magnitude();
|
||||
float w;
|
||||
#ifdef DC_SH4
|
||||
vec3f_distance(v1.x, v1.y, v1.z, v2.x, v2.y, v2.z, w);
|
||||
return w;
|
||||
#else
|
||||
return (v2 - v1).Magnitude();
|
||||
#endif
|
||||
}
|
||||
|
||||
inline float
|
||||
|
||||
@@ -50,7 +50,7 @@ __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const CV
|
||||
sqc2 vf06,0x0(%0)\n\
|
||||
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
|
||||
#elif defined(DC_SH4)
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_single3_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z);
|
||||
#else
|
||||
out = mat * in;
|
||||
@@ -77,7 +77,7 @@ __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const Rw
|
||||
sqc2 vf06,0x0(%0)\n\
|
||||
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
|
||||
#elif defined(DC_SH4)
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
mat_trans_single3_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z);
|
||||
#else
|
||||
out = mat * in;
|
||||
@@ -86,7 +86,7 @@ __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const Rw
|
||||
|
||||
__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride)
|
||||
{
|
||||
#ifdef GTA_PS3
|
||||
#ifdef GTA_PS2
|
||||
__asm__ __volatile__("\n\
|
||||
paddub $3,%4,$0\n\
|
||||
lqc2 vf02,0x0(%2)\n\
|
||||
@@ -114,7 +114,7 @@ __always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat,
|
||||
bnez %1,1b\n\
|
||||
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
|
||||
#elif defined(DC_SH4)
|
||||
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
|
||||
dc::mat_load2(mat);
|
||||
while(n--) {
|
||||
mat_trans_single3_nodiv_nomod(in->x, in->y, in->z, out->x, out->y, out->z);
|
||||
in = reinterpret_cast<const RwV3d *>(reinterpret_cast<const uint8_t *>(in) + stride);
|
||||
@@ -137,7 +137,6 @@ __always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat,
|
||||
lqc2 vf03,0x10(%2)\n\
|
||||
lqc2 vf04,0x20(%2)\n\
|
||||
lqc2 vf05,0x30(%2)\n\
|
||||
lqc2 vf01,0x0(%3)\n\
|
||||
nop\n\
|
||||
1: vmulax.xyz ACC, vf02,vf01\n\
|
||||
vmadday.xyz ACC, vf03,vf01\n\
|
||||
|
||||
@@ -1,46 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "src/common_defines.h"
|
||||
#include "rwdc_common.h"
|
||||
|
||||
#include <dc/matrix.h>
|
||||
|
||||
#ifdef DC_SH4
|
||||
|
||||
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
#else
|
||||
|
||||
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
|
||||
vector_t tmp = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp, &tmp, 1, 0); \
|
||||
x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; w2 = tmp.w; \
|
||||
} while(false)
|
||||
#endif
|
||||
|
||||
|
||||
// wrapper around float versions of functions
|
||||
// in gta they are in CMaths but that makes the code rather noisy
|
||||
|
||||
inline float Sin(float x) { return sinf(x); }
|
||||
inline float Asin(float x) { return asinf(x); }
|
||||
inline float Cos(float x) { return cosf(x); }
|
||||
inline float Acos(float x) { return acosf(x); }
|
||||
inline float Tan(float x) { return tanf(x); }
|
||||
inline float Atan(float x) { return atanf(x); }
|
||||
inline float Atan2(float y, float x) { return atan2f(y, x); }
|
||||
inline float Abs(float x) { return fabsf(x); }
|
||||
inline float Sqrt(float x) { return sqrtf(x); }
|
||||
inline float RecipSqrt(float x, float y) { return x/Sqrt(y); }
|
||||
inline float RecipSqrt(float x) { return RecipSqrt(1.0f, x); }
|
||||
inline float Pow(float x, float y) { return powf(x, y); }
|
||||
inline float Floor(float x) { return floorf(x); }
|
||||
inline float Ceil(float x) { return ceilf(x); }
|
||||
using namespace dc;
|
||||
|
||||
@@ -17,7 +17,7 @@ enum eWinVersion
|
||||
char *_strdate(char *buf);
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
// As long as WITHWINDOWS isn't defined / <Windows.h> isn't included, we only need type definitions so let's include <IntSafe.h>.
|
||||
// NOTE: It's perfectly fine to include <Windows.h> here, but it can increase build size and time in *some* conditions, and maybe substantially in future if we'll use crossplatform.h more.
|
||||
|
||||
2
vendor/dca3-kos
vendored
2
vendor/dca3-kos
vendored
Submodule vendor/dca3-kos updated: 5f69d048aa...a270608762
45
vendor/emu/lxdream/tacore.cpp
vendored
45
vendor/emu/lxdream/tacore.cpp
vendored
@@ -182,7 +182,8 @@ struct pvr2_ta_status {
|
||||
int32_t state;
|
||||
int32_t width, height; /* Tile resolution, ie 20x15 */
|
||||
int32_t tilelist_dir; /* Growth direction of the tilelist, 0 = up, 1 = down */
|
||||
uint32_t tilelist_size; /* Size of the tilelist segments */
|
||||
// uint32_t tilelist_size; /* Size of the tilelist segments */
|
||||
// this needs to be current_tile_size
|
||||
uint32_t tilelist_start; /* Initial address of the tilelist */
|
||||
uint32_t polybuf_start; /* Initial bank address of the polygon buffer (ie &0x00F00000) */
|
||||
int32_t current_vertex_type;
|
||||
@@ -208,6 +209,9 @@ struct pvr2_ta_status {
|
||||
struct tile_bounds last_triangle_bounds;
|
||||
struct pvr2_ta_vertex poly_vertex[8];
|
||||
uint32_t debug_output;
|
||||
|
||||
bool modifier_last_volume;
|
||||
struct tile_bounds modifier_bounds;
|
||||
};
|
||||
|
||||
static struct pvr2_ta_status ta_status;
|
||||
@@ -253,13 +257,9 @@ void lxd_ta_init(u8* vram) {
|
||||
ta_status.clip.y2 = ta_status.height-1;
|
||||
uint32_t control = TA_ALLOC_CTRL; //MMIO_READ( PVR2, TA_TILECFG );
|
||||
ta_status.tilelist_dir = (control >> 20) & 0x01;
|
||||
ta_status.tilelist_size = tilematrix_sizes[ (control & 0x03) ];
|
||||
TA_ISP_CURRENT = TA_ISP_BASE;
|
||||
//MMIO_WRITE( PVR2, TA_POLYPOS, MMIO_READ( PVR2, TA_POLYBASE ) );
|
||||
uint32_t plistpos = TA_NEXT_OPB_INIT >> 2; //MMIO_READ( PVR2, TA_LISTBASE )
|
||||
if( ta_status.tilelist_dir == TA_GROW_DOWN ) {
|
||||
plistpos -= ta_status.tilelist_size;
|
||||
}
|
||||
TA_NEXT_OPB = plistpos;
|
||||
//MMIO_WRITE( PVR2, TA_LISTPOS, plistpos );
|
||||
ta_status.tilelist_start = plistpos;
|
||||
@@ -433,7 +433,9 @@ static uint32_t ta_alloc_tilelist( uint32_t reference ) {
|
||||
uint32_t limit = TA_OL_LIMIT >> 2;//MMIO_READ( PVR2, TA_LISTEND ) >> 2;
|
||||
uint32_t newposn;
|
||||
if( ta_status.tilelist_dir == TA_GROW_DOWN ) {
|
||||
newposn = posn - ta_status.tilelist_size;
|
||||
// printf("**TA WARNING**: Allocating tilelist in GROW_DOWN mode\n");
|
||||
posn -= ta_status.current_tile_size;
|
||||
newposn = posn;
|
||||
if( posn == limit ) {
|
||||
PVRRAM(posn<<2) = 0xF0000000;
|
||||
PVRRAM(reference) = 0xE0000000 | (posn<<2);
|
||||
@@ -442,7 +444,7 @@ static uint32_t ta_alloc_tilelist( uint32_t reference ) {
|
||||
PVRRAM(reference) = 0xE0000000 | (posn<<2);
|
||||
return TA_NO_ALLOC;
|
||||
} else if( newposn <= limit ) {
|
||||
} else if( newposn <= (limit + ta_status.tilelist_size) ) {
|
||||
} else if( newposn <= (limit + ta_status.current_tile_size) ) {
|
||||
// asic_RaiseInterrupt(holly_MATR_NOMEM);
|
||||
pvr_queue_interrupt(ASIC_EVT_PVR_OPB_OUTOFMEM);
|
||||
printf("TA error: holly_MATR_NOMEM. Interrupt raised\n");
|
||||
@@ -456,7 +458,7 @@ static uint32_t ta_alloc_tilelist( uint32_t reference ) {
|
||||
PVRRAM(reference) = 0xE0000000 | (posn<<2);
|
||||
return posn << 2;
|
||||
} else {
|
||||
newposn = posn + ta_status.tilelist_size;
|
||||
newposn = posn + ta_status.current_tile_size;
|
||||
if( posn == limit ) {
|
||||
PVRRAM(posn<<2) = 0xF0000000;
|
||||
PVRRAM(reference) = 0xE0000000 | (posn<<2);
|
||||
@@ -465,7 +467,7 @@ static uint32_t ta_alloc_tilelist( uint32_t reference ) {
|
||||
PVRRAM(reference) = 0xE0000000 | (posn<<2);
|
||||
return TA_NO_ALLOC;
|
||||
} else if( newposn >= limit ) {
|
||||
} else if( newposn >= (limit - ta_status.tilelist_size) ) {
|
||||
} else if( newposn >= (limit - ta_status.current_tile_size) ) {
|
||||
// asic_RaiseInterrupt(holly_MATR_NOMEM);
|
||||
pvr_queue_interrupt(ASIC_EVT_PVR_OPB_OUTOFMEM);
|
||||
printf("TA error: holly_MATR_NOMEM. Interrupt raised\n");
|
||||
@@ -621,6 +623,17 @@ static void ta_commit_polygon( ) {
|
||||
if( polygon_bound.y1 < 0 ) polygon_bound.y1 = 0;
|
||||
if( polygon_bound.y2 >= ta_status.height ) polygon_bound.y2 = ta_status.height-1;
|
||||
|
||||
if (ta_status.current_vertex_type == TA_VERTEX_MOD_VOLUME) {
|
||||
ta_status.modifier_bounds.x1 = MIN(ta_status.modifier_bounds.x1, polygon_bound.x1);
|
||||
ta_status.modifier_bounds.x2 = MAX(ta_status.modifier_bounds.x2, polygon_bound.x2);
|
||||
ta_status.modifier_bounds.y1 = MIN(ta_status.modifier_bounds.y1, polygon_bound.y1);
|
||||
ta_status.modifier_bounds.y2 = MAX(ta_status.modifier_bounds.y2, polygon_bound.y2);
|
||||
|
||||
if (ta_status.modifier_last_volume) {
|
||||
polygon_bound = ta_status.modifier_bounds;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the "single tile" flag if it's entirely contained in 1 tile */
|
||||
if( polygon_bound.x1 == polygon_bound.x2 &&
|
||||
polygon_bound.y1 == polygon_bound.y2 ) {
|
||||
@@ -843,6 +856,14 @@ static void ta_parse_modifier_context( union ta_data *data ) {
|
||||
ta_status.vertex_count = 0;
|
||||
ta_status.max_vertex = 3;
|
||||
ta_status.poly_pointer = 0;
|
||||
|
||||
if (ta_status.modifier_last_volume) {
|
||||
ta_status.modifier_bounds.x1 = INT_MAX/32;
|
||||
ta_status.modifier_bounds.y1 = INT_MAX/32;
|
||||
ta_status.modifier_bounds.x2 = -1;
|
||||
ta_status.modifier_bounds.y2 = -1;
|
||||
}
|
||||
ta_status.modifier_last_volume = data[0].i & TA_POLYCMD_FULLMOD;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1006,7 +1027,11 @@ static void ta_parse_vertex( union ta_data *data ) {
|
||||
vertex++;
|
||||
vertex->x = data[7].f;
|
||||
ta_status.vertex_count += 2;
|
||||
ta_status.state = STATE_EXPECT_VERTEX_BLOCK2;
|
||||
if (ta_status.current_vertex_type == TA_VERTEX_SPRITE || ta_status.current_vertex_type == TA_VERTEX_TEX_SPRITE) {
|
||||
ta_status.state = STATE_EXPECT_END_VERTEX_BLOCK2;
|
||||
} else {
|
||||
ta_status.state = STATE_EXPECT_VERTEX_BLOCK2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ta_status.vertex_count++;
|
||||
|
||||
12
vendor/emu/refsw/TexUtils.cpp
vendored
12
vendor/emu/refsw/TexUtils.cpp
vendored
@@ -14,9 +14,9 @@
|
||||
#endif
|
||||
|
||||
u32 detwiddle[2][11][1024];
|
||||
u8 BM_SIN90[256];
|
||||
u8 BM_COS90[256];
|
||||
u8 BM_COS360[256];
|
||||
s8 BM_SIN90[256];
|
||||
s8 BM_COS90[256];
|
||||
s8 BM_COS360[256];
|
||||
|
||||
//input : address in the yyyyyxxxxx format
|
||||
//output : address in the xyxyxyxy format
|
||||
@@ -70,9 +70,9 @@ void BuildTables()
|
||||
}
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
BM_SIN90[i] = 255 * sinf((i / 256.0f) * (M_PI / 2));
|
||||
BM_COS90[i] = 255 * cosf((i / 256.0f) * (M_PI / 2));
|
||||
BM_COS360[i] = 255 * cosf((i / 256.0f) * (2 * M_PI));
|
||||
BM_SIN90[i] = 127 * sinf((i / 256.0f) * (M_PI / 2));
|
||||
BM_COS90[i] = 127 * cosf((i / 256.0f) * (M_PI / 2));
|
||||
BM_COS360[i] = 127 * cosf((i / 256.0f) * (2 * M_PI));
|
||||
}
|
||||
|
||||
}
|
||||
6
vendor/emu/refsw/TexUtils.h
vendored
6
vendor/emu/refsw/TexUtils.h
vendored
@@ -8,9 +8,9 @@
|
||||
#include <algorithm>
|
||||
|
||||
extern u32 detwiddle[2][11][1024];
|
||||
extern u8 BM_SIN90[256];
|
||||
extern u8 BM_COS90[256];
|
||||
extern u8 BM_COS360[256];
|
||||
extern s8 BM_SIN90[256];
|
||||
extern s8 BM_COS90[256];
|
||||
extern s8 BM_COS360[256];
|
||||
|
||||
void BuildTables();
|
||||
|
||||
|
||||
1456
vendor/emu/refsw/gentable.h
vendored
Normal file
1456
vendor/emu/refsw/gentable.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
90
vendor/emu/refsw/gentable.py
vendored
Normal file
90
vendor/emu/refsw/gentable.py
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
import itertools
|
||||
|
||||
def generate_table(name, parameters):
|
||||
"""
|
||||
Generates C++ code for PixelFlush_tsp_table given parameter ranges.
|
||||
:param parameters: tuple of ints, the range for each template parameter
|
||||
:return: string containing the C++ table declaration and initializer
|
||||
"""
|
||||
num_params = len(parameters)
|
||||
# Construct the table dimensions
|
||||
dims = ''.join(f'[{p}]' for p in parameters)
|
||||
# Start building the initializer as a list of strings
|
||||
lines = []
|
||||
|
||||
def recurse(level, indices, indent):
|
||||
if level == num_params:
|
||||
# Leaf: generate function pointer
|
||||
params_list = ', '.join(str(i) for i in indices)
|
||||
lines.append(f"{indent}&{name}<{params_list}>,")
|
||||
else:
|
||||
# Open brace for this dimension
|
||||
lines.append(f"{indent}{{")
|
||||
for i in range(parameters[level]):
|
||||
recurse(level + 1, indices + [i], indent + ' ')
|
||||
# Close brace
|
||||
if level != 0:
|
||||
lines.append(f"{indent}}},")
|
||||
else:
|
||||
lines.append(f"{indent}}}")
|
||||
|
||||
# Table declaration
|
||||
decl = f"{name}_fp {name}_table{dims} ="
|
||||
lines.append(decl)
|
||||
recurse(0, [], ' ')
|
||||
lines.append(';')
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Example usage: each bool has 2 possibilities, some enums have more
|
||||
bitwidths = (
|
||||
1, # [pp_AlphaTest]
|
||||
1, # [entry->params.tsp[two_voume_index].UseAlpha]
|
||||
1, # [entry->params.isp.Texture]
|
||||
1, # [entry->params.isp.Offset]
|
||||
1, # [entry->params.tsp[two_voume_index].ColorClamp]
|
||||
2, # [entry->params.tsp[two_voume_index].FogCtrl]
|
||||
1, # [FPU_SHAD_SCALE.intensity_shadow]
|
||||
)
|
||||
code = generate_table("PixelFlush_tsp", tuple(1 << bw for bw in bitwidths))
|
||||
print(code)
|
||||
|
||||
bitwidths = (
|
||||
1, # [entry->params.tsp[two_voume_index].IgnoreTexA]
|
||||
1, # [entry->params.tsp[two_voume_index].ClampU]
|
||||
1, # [entry->params.tsp[two_voume_index].ClampV]
|
||||
1, # [entry->params.tsp[two_voume_index].FlipU]
|
||||
1, # [entry->params.tsp[two_voume_index].FlipV]
|
||||
2, # [entry->params.tsp[two_voume_index].FilterMode]
|
||||
)
|
||||
code = generate_table("TextureFilter", tuple(1 << bw for bw in bitwidths))
|
||||
print(code)
|
||||
|
||||
bitwidths = (
|
||||
1, # [entry->params.isp.Texture]
|
||||
1, # [entry->params.isp.Offset]
|
||||
2, # [entry->params.tsp[two_voume_index].ShadInstr ]
|
||||
)
|
||||
code = generate_table("ColorCombiner", tuple(1 << bw for bw in bitwidths))
|
||||
print(code)
|
||||
|
||||
bitwidths = (
|
||||
1, # [entry->params.tsp[two_voume_index].SrcSelect]
|
||||
1, # [entry->params.tsp[two_voume_index].DstSelect]
|
||||
3, # [entry->params.tsp[two_voume_index].SrcInstr]
|
||||
3, # [entry->params.tsp[two_voume_index].DstInstr]
|
||||
)
|
||||
code = generate_table("BlendingUnit", tuple(1 << bw for bw in bitwidths))
|
||||
print(code)
|
||||
|
||||
bitwidths = (
|
||||
1, # [entry->params.tcw[two_voume_index].VQ_Comp]
|
||||
1, # [entry->params.tcw[two_voume_index].MipMapped]
|
||||
1, # [entry->params.tcw[two_voume_index].ScanOrder]
|
||||
1, # [entry->params.tcw[two_voume_index].StrideSel]
|
||||
3, # [entry->params.tcw[two_voume_index].PixelFmt]
|
||||
)
|
||||
code = generate_table("TextureFetch", tuple(1 << bw for bw in bitwidths))
|
||||
print(code)
|
||||
169
vendor/emu/refsw/refsw_lists.cpp
vendored
169
vendor/emu/refsw/refsw_lists.cpp
vendored
@@ -81,34 +81,16 @@
|
||||
|
||||
#include "refsw_tile.h"
|
||||
|
||||
#define JLOG(...)
|
||||
#define JLOG2(...)
|
||||
#define V(x) x
|
||||
|
||||
void log_vertex(const Vertex& v) {
|
||||
JLOG(ll,
|
||||
V(v.x), V(v.y), V(v.z),
|
||||
V(v.col[0]), V(v.col[1]), V(v.col[2]), V(v.col[3]), V(v.spc[0]), V(v.spc[1]), V(v.spc[2]), V(v.spc[3]), V(v.u), V(v.v),
|
||||
V(v.col1[0]), V(v.col1[1]), V(v.col1[2]), V(v.col1[3]), V(v.spc1[0]), V(v.spc1[1]), V(v.spc1[2]), V(v.spc1[3]), V(v.u1), V(v.v1)
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
Main renderer class
|
||||
*/
|
||||
|
||||
void RenderTriangle(RenderMode render_mode, DrawParameters* params, parameter_tag_t tag, const Vertex& v1, const Vertex& v2, const Vertex& v3, const Vertex* v4, taRECT* area)
|
||||
{
|
||||
JLOG(ll, V(render_mode), V(tag), "tsp0", params->tsp[0].full, "tcw0", params->tcw[0].full, "tsp1", params->tsp[1].full, "tcw1", params->tcw[1].full);
|
||||
{
|
||||
RasterizeTriangle_table[render_mode](params, tag, v1, v2, v3, v4, area);
|
||||
|
||||
log_vertex(v1);
|
||||
log_vertex(v2);
|
||||
log_vertex(v3);
|
||||
if (v4) {
|
||||
log_vertex(*v4);
|
||||
if (render_mode == RM_TRANSLUCENT_PRESORT) {
|
||||
RenderParamTags<RM_TRANSLUCENT_PRESORT>(area->left, area->top);
|
||||
}
|
||||
|
||||
RasterizeTriangle(render_mode, params, tag, v1, v2, v3, v4, area);
|
||||
|
||||
if (render_mode == RM_MODIFIER)
|
||||
{
|
||||
@@ -138,6 +120,7 @@ u32 ReadRegionArrayEntry(u32 base, RegionArrayEntry* entry)
|
||||
u32 rv;
|
||||
if (fmt_v1)
|
||||
{
|
||||
entry->control.pre_sort = ISP_FEED_CFG.pre_sort;
|
||||
entry->puncht.full = 0x80000000;
|
||||
rv = 5 * 4;
|
||||
}
|
||||
@@ -160,13 +143,13 @@ u32 ReadRegionArrayEntry(u32 base, RegionArrayEntry* entry)
|
||||
}
|
||||
|
||||
ISP_BACKGND_T_type CoreTagFromDesc(u32 cache_bypass, u32 shadow, u32 skip, u32 param_offs_in_words, u32 tag_offset) {
|
||||
ISP_BACKGND_T_type rv {
|
||||
.tag_offset = tag_offset,
|
||||
.param_offs_in_words = param_offs_in_words,
|
||||
.skip = skip,
|
||||
.shadow = shadow,
|
||||
.cache_bypass = cache_bypass
|
||||
};
|
||||
ISP_BACKGND_T_type rv;
|
||||
rv.full = 0;
|
||||
rv.tag_offset = tag_offset;
|
||||
rv.param_offs_in_words = param_offs_in_words;
|
||||
rv.skip = skip;
|
||||
rv.shadow = shadow;
|
||||
rv.cache_bypass = cache_bypass;
|
||||
|
||||
return rv;
|
||||
}
|
||||
@@ -174,8 +157,6 @@ ISP_BACKGND_T_type CoreTagFromDesc(u32 cache_bypass, u32 shadow, u32 skip, u32 p
|
||||
// render a triangle strip object list entry
|
||||
void RenderTriangleStrip(RenderMode render_mode, ObjectListEntry obj, taRECT* rect)
|
||||
{
|
||||
JLOG(ll, V(render_mode), "obj", obj.full);
|
||||
|
||||
Vertex vtx[8];
|
||||
DrawParameters params;
|
||||
|
||||
@@ -204,7 +185,6 @@ void RenderTriangleStrip(RenderMode render_mode, ObjectListEntry obj, taRECT* re
|
||||
// render a triangle array object list entry
|
||||
void RenderTriangleArray(RenderMode render_mode, ObjectListEntry obj, taRECT* rect)
|
||||
{
|
||||
JLOG(ll, V(render_mode), "obj", obj.full);
|
||||
auto triangles = obj.tarray.prims + 1;
|
||||
u32 param_base = PARAM_BASE & 0xF00000;
|
||||
|
||||
@@ -222,8 +202,6 @@ void RenderTriangleArray(RenderMode render_mode, ObjectListEntry obj, taRECT* re
|
||||
|
||||
parameter_tag_t tag = CoreTagFromDesc(params.isp.CacheBypass, obj.tstrip.shadow, obj.tstrip.skip, (tag_address - param_base)/4, 0).full;
|
||||
|
||||
assert(!(tag & TAG_INVALID));
|
||||
|
||||
RenderTriangle(render_mode, ¶ms, tag, vtx[0], vtx[1], vtx[2], nullptr, rect);
|
||||
}
|
||||
}
|
||||
@@ -231,8 +209,6 @@ void RenderTriangleArray(RenderMode render_mode, ObjectListEntry obj, taRECT* re
|
||||
// render a quad array object list entry
|
||||
void RenderQuadArray(RenderMode render_mode, ObjectListEntry obj, taRECT* rect)
|
||||
{
|
||||
JLOG(ll, V(render_mode), "obj", obj.full);
|
||||
|
||||
auto quads = obj.qarray.prims + 1;
|
||||
u32 param_base = PARAM_BASE & 0xF00000;
|
||||
|
||||
@@ -301,22 +277,12 @@ void RenderCORE() {
|
||||
}
|
||||
u32 base = REGION_BASE;
|
||||
|
||||
JLOG(ll, V(REGION_BASE));
|
||||
|
||||
RegionArrayEntry entry;
|
||||
|
||||
// Parse region array
|
||||
do {
|
||||
auto step = ReadRegionArrayEntry(base, &entry);
|
||||
|
||||
JLOG2(llrrae, "ReadRegionArrayEntry", V(base),
|
||||
"control", entry.control.full,
|
||||
"opaque", entry.opaque.full,
|
||||
"opaque_mod", entry.opaque_mod.full,
|
||||
"trans", entry.trans.full,
|
||||
"trans_mod", entry.trans_mod.full,
|
||||
"puncht", entry.puncht.full);
|
||||
|
||||
base += step;
|
||||
|
||||
taRECT rect;
|
||||
@@ -328,6 +294,7 @@ void RenderCORE() {
|
||||
|
||||
parameter_tag_t bgTag;
|
||||
|
||||
ClearFpuCache();
|
||||
// register BGPOLY to fpu
|
||||
{
|
||||
bgTag = ISP_BACKGND_T.full;
|
||||
@@ -338,85 +305,102 @@ void RenderCORE() {
|
||||
{
|
||||
// Clear Param + Z + stencil buffers
|
||||
ClearBuffers(bgTag, ISP_BACKGND_D.f, 0);
|
||||
} else {
|
||||
ClearParamStatusBuffer();
|
||||
}
|
||||
|
||||
// Render OPAQ to TAGS
|
||||
if (!entry.opaque.empty)
|
||||
{
|
||||
JLOG2(llo, "opaque", V(entry.opaque.ptr_in_words));
|
||||
RenderObjectList(RM_OPAQUE, entry.opaque.ptr_in_words * 4, &rect);
|
||||
|
||||
if (!entry.opaque_mod.empty)
|
||||
{
|
||||
RenderObjectList(RM_MODIFIER, entry.opaque_mod.ptr_in_words * 4, &rect);
|
||||
}
|
||||
}
|
||||
|
||||
// Render TAGS to ACCUM
|
||||
RenderParamTags(RM_OPAQUE, rect.left, rect.top);
|
||||
RenderParamTags<RM_OPAQUE>(rect.left, rect.top);
|
||||
|
||||
// render PT to TAGS
|
||||
if (!entry.puncht.empty)
|
||||
{
|
||||
PeelBuffersPTInitial(FLT_MAX);
|
||||
|
||||
ClearMoreToDraw();
|
||||
|
||||
do {
|
||||
// Render to TAGS
|
||||
RenderObjectList(RM_PUNCHTHROUGH_PASS0, entry.puncht.ptr_in_words * 4, &rect);
|
||||
|
||||
// keep reference Z buffer
|
||||
PeelBuffersPT();
|
||||
|
||||
// Render TAGS to ACCUM, making Z holes as-needed
|
||||
RenderParamTags<RM_PUNCHTHROUGH_PASS0>(rect.left, rect.top);
|
||||
|
||||
while (GetMoreToDraw()) {
|
||||
ClearMoreToDraw();
|
||||
|
||||
// Render to TAGS
|
||||
{
|
||||
JLOG2(llo, "puncht", V(entry.puncht.ptr_in_words));
|
||||
RenderObjectList(RM_PUNCHTHROUGH, entry.puncht.ptr_in_words * 4, &rect);
|
||||
}
|
||||
RenderObjectList(RM_PUNCHTHROUGH_PASSN, entry.puncht.ptr_in_words * 4, &rect);
|
||||
|
||||
if (!GetMoreToDraw())
|
||||
break;
|
||||
|
||||
ClearMoreToDraw();
|
||||
// keep reference Z buffer
|
||||
PeelBuffersPT();
|
||||
|
||||
// Render TAGS to ACCUM, making Z holes as-needed
|
||||
RenderParamTags(RM_PUNCHTHROUGH, rect.left, rect.top);
|
||||
|
||||
// Copy TAGB=TAGA buffers, clear TAGA
|
||||
PeelBuffersPTAfterHoles();
|
||||
|
||||
} while (GetMoreToDraw() != 0);
|
||||
}
|
||||
|
||||
|
||||
//TODO: Actually render OPAQ modvol affected pixels
|
||||
if (!entry.opaque_mod.empty)
|
||||
{
|
||||
JLOG2(llo, "opaque_mod", V(entry.opaque_mod.ptr_in_words));
|
||||
RenderObjectList(RM_MODIFIER, entry.opaque_mod.ptr_in_words * 4, &rect);
|
||||
RenderParamTags(RM_OP_PT_MV, rect.left, rect.top);
|
||||
RenderParamTags<RM_PUNCHTHROUGH_PASS0>(rect.left, rect.top);
|
||||
}
|
||||
if (!entry.opaque_mod.empty)
|
||||
{
|
||||
RenderObjectList(RM_MODIFIER, entry.opaque_mod.ptr_in_words * 4, &rect);
|
||||
RenderParamTags<RM_PUNCHTHROUGH_MV>(rect.left, rect.top);
|
||||
}
|
||||
}
|
||||
|
||||
// layer peeling rendering
|
||||
if (!entry.trans.empty)
|
||||
{
|
||||
// clear the param buffer
|
||||
ClearParamBuffer(TAG_INVALID);
|
||||
if (entry.control.pre_sort) {
|
||||
// clear the param buffer
|
||||
ClearParamStatusBuffer();
|
||||
|
||||
do
|
||||
{
|
||||
// prepare for a new pass
|
||||
ClearMoreToDraw();
|
||||
// render to TAGS
|
||||
{
|
||||
RenderObjectList(RM_TRANSLUCENT_PRESORT, entry.trans.ptr_in_words * 4, &rect);
|
||||
}
|
||||
|
||||
// what happens with modvols here?
|
||||
// if (!entry.trans_mod.empty)
|
||||
// {
|
||||
// RenderObjectList(RM_MODIFIER, entry.trans_mod.ptr_in_words * 4, &rect);
|
||||
// }
|
||||
} else {
|
||||
do
|
||||
{
|
||||
// prepare for a new pass
|
||||
ClearMoreToDraw();
|
||||
|
||||
if (!ISP_FEED_CFG.pre_sort) {
|
||||
// copy depth test to depth reference buffer, clear depth test buffer, clear stencil
|
||||
PeelBuffers(FLT_MAX, 0);
|
||||
}
|
||||
|
||||
// render to TAGS
|
||||
{
|
||||
JLOG2(llo, "trans", V(entry.trans.ptr_in_words));
|
||||
RenderObjectList(RM_TRANSLUCENT, entry.trans.ptr_in_words * 4, &rect);
|
||||
}
|
||||
// render to TAGS
|
||||
{
|
||||
RenderObjectList(RM_TRANSLUCENT_AUTOSORT, entry.trans.ptr_in_words * 4, &rect);
|
||||
}
|
||||
|
||||
if (!entry.trans_mod.empty)
|
||||
{
|
||||
JLOG2(llo, "trans_mod", V(entry.trans_mod.ptr_in_words));
|
||||
RenderObjectList(RM_MODIFIER, entry.trans_mod.ptr_in_words * 4, &rect);
|
||||
}
|
||||
if (!entry.trans_mod.empty)
|
||||
{
|
||||
RenderObjectList(RM_MODIFIER, entry.trans_mod.ptr_in_words * 4, &rect);
|
||||
}
|
||||
|
||||
// render TAGS to ACCUM
|
||||
// also marks TAGS as invalid, but keeps the index for coplanar sorting
|
||||
RenderParamTags(RM_TRANSLUCENT, rect.left, rect.top);
|
||||
} while (GetMoreToDraw() != 0);
|
||||
// render TAGS to ACCUM
|
||||
RenderParamTags<RM_TRANSLUCENT_AUTOSORT>(rect.left, rect.top);
|
||||
} while (GetMoreToDraw() != 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy to vram
|
||||
@@ -469,9 +453,6 @@ void RenderCORE() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clear the tsp cache
|
||||
ClearFpuEntries();
|
||||
} while (!entry.control.last_region);
|
||||
}
|
||||
|
||||
|
||||
18
vendor/emu/refsw/refsw_lists.h
vendored
18
vendor/emu/refsw/refsw_lists.h
vendored
@@ -26,13 +26,23 @@ struct DrawParameters
|
||||
|
||||
enum RenderMode {
|
||||
RM_OPAQUE,
|
||||
RM_PUNCHTHROUGH,
|
||||
RM_OP_PT_MV, // OP and PT with modvol
|
||||
RM_TRANSLUCENT,
|
||||
RM_PUNCHTHROUGH_PASS0,
|
||||
RM_PUNCHTHROUGH_PASSN,
|
||||
RM_PUNCHTHROUGH_MV, // PT MODVOL 2nd pass
|
||||
RM_TRANSLUCENT_AUTOSORT,
|
||||
RM_TRANSLUCENT_PRESORT,
|
||||
RM_MODIFIER,
|
||||
};
|
||||
|
||||
#define TAG_INVALID (1 << 31)
|
||||
struct TagState {
|
||||
union {
|
||||
struct {
|
||||
bool valid: 1;
|
||||
bool rendered: 1;
|
||||
};
|
||||
uint8_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
typedef u32 parameter_tag_t;
|
||||
|
||||
|
||||
669
vendor/emu/refsw/refsw_tile.cpp
vendored
669
vendor/emu/refsw/refsw_tile.cpp
vendored
File diff suppressed because it is too large
Load Diff
50
vendor/emu/refsw/refsw_tile.h
vendored
50
vendor/emu/refsw/refsw_tile.h
vendored
@@ -39,6 +39,10 @@ struct PlaneStepper3
|
||||
|
||||
float C = ((v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y));
|
||||
|
||||
if (C == 0) {
|
||||
C = 1; // avoid divide by zero
|
||||
}
|
||||
|
||||
ddx = -Aa / C;
|
||||
ddy = -Ba / C;
|
||||
|
||||
@@ -54,6 +58,16 @@ struct PlaneStepper3
|
||||
{
|
||||
return Ip(x, y) * W;
|
||||
}
|
||||
|
||||
float IpU8(float x, float y, float W) const
|
||||
{
|
||||
float rv = Ip(x, y, W);
|
||||
|
||||
if (rv < 0) rv = 0;
|
||||
if (rv > 255) rv = 255;
|
||||
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -87,8 +101,8 @@ struct IPs3
|
||||
}
|
||||
|
||||
if (TwoVolumes) {
|
||||
U[1].Setup(rect, v1, v2, v3, v1.u * v1.z, v2.u1 * v2.z, v3.u1 * v3.z);
|
||||
V[1].Setup(rect, v1, v2, v3, v1.v * v1.z, v2.v1 * v2.z, v3.v1 * v3.z);
|
||||
U[1].Setup(rect, v1, v2, v3, v1.u1 * v1.z, v2.u1 * v2.z, v3.u1 * v3.z);
|
||||
V[1].Setup(rect, v1, v2, v3, v1.v1 * v1.z, v2.v1 * v2.z, v3.v1 * v3.z);
|
||||
if (params->isp.Gouraud) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
Col[1][i].Setup(rect, v1, v2, v3, v1.col1[i] * v1.z, v2.col1[i] * v2.z, v3.col1[i] * v3.z);
|
||||
@@ -128,10 +142,9 @@ extern u32 colorBuffer1 [MAX_RENDER_PIXELS];
|
||||
extern const char* dump_textures;
|
||||
|
||||
void ClearBuffers(u32 paramValue, float depthValue, u32 stencilValue);
|
||||
void ClearParamBuffer(parameter_tag_t paramValue);
|
||||
void ClearParamStatusBuffer();
|
||||
void PeelBuffers(float depthValue, u32 stencilValue);
|
||||
void PeelBuffersPT();
|
||||
void PeelBuffersPTAfterHoles();
|
||||
void PeelBuffersPTInitial(float depthValue);
|
||||
void SummarizeStencilOr();
|
||||
void SummarizeStencilAnd();
|
||||
@@ -140,31 +153,29 @@ bool GetMoreToDraw();
|
||||
|
||||
// Render to ACCUM from TAG buffer
|
||||
// TAG holds references to triangles, ACCUM is the tile framebuffer
|
||||
void RenderParamTags(RenderMode rm, int tileX, int tileY);
|
||||
void ClearFpuEntries();
|
||||
template<RenderMode rm>
|
||||
void RenderParamTags(int tileX, int tileY);
|
||||
|
||||
f32 f16(u16 v);
|
||||
inline __attribute__((always_inline)) f32 f16(u16 v)
|
||||
{
|
||||
u32 z=v<<16;
|
||||
return *(f32*)&z;
|
||||
}
|
||||
|
||||
//decode a vertex in the native pvr format
|
||||
void decode_pvr_vertex(DrawParameters* params, pvr32addr_t ptr,Vertex* cv, u32 shadow);
|
||||
// decode an object (params + vertexes)
|
||||
u32 decode_pvr_vertices(DrawParameters* params, pvr32addr_t base, u32 skip, u32 two_volumes, Vertex* vtx, int count, int offset);
|
||||
|
||||
FpuEntry GetFpuEntry(taRECT *rect, RenderMode render_mode, ISP_BACKGND_T_type core_tag);
|
||||
const FpuEntry& GetFpuEntry(taRECT *rect, RenderMode render_mode, ISP_BACKGND_T_type core_tag);
|
||||
// Lookup/create cached TSP parameters, and call PixelFlush_tsp
|
||||
bool PixelFlush_tsp(bool pp_AlphaTest, FpuEntry* entry, float x, float y, u32 index, float invW, bool InVolume);
|
||||
bool PixelFlush_tsp(bool pp_AlphaTest, const FpuEntry* entry, float x, float y, u32 index, float invW, bool InVolume);
|
||||
// Rasterize a single triangle to ISP (or ISP+TSP for PT)
|
||||
void RasterizeTriangle(RenderMode render_mode, DrawParameters* params, parameter_tag_t tag, const Vertex& v1, const Vertex& v2, const Vertex& v3, const Vertex* v4, taRECT* area);
|
||||
|
||||
extern void (*RasterizeTriangle_table[])(DrawParameters* params, parameter_tag_t tag, const Vertex& v1, const Vertex& v2, const Vertex& v3, const Vertex* v4, taRECT* area);
|
||||
|
||||
u8* GetColorOutputBuffer();
|
||||
|
||||
// Implement the full texture/shade pipeline for a pixel
|
||||
bool PixelFlush_tsp(
|
||||
bool pp_UseAlpha, bool pp_Texture, bool pp_Offset, bool pp_ColorClamp, u32 pp_FogCtrl, bool pp_IgnoreAlpha, bool pp_ClampU, bool pp_ClampV, bool pp_FlipU, bool pp_FlipV, u32 pp_FilterMode, u32 pp_ShadInstr, bool pp_AlphaTest, u32 pp_SrcSel, u32 pp_DstSel, u32 pp_SrcInst, u32 pp_DstInst,
|
||||
const FpuEntry *entry, float x, float y, float W, bool InVolume, u32 index);
|
||||
|
||||
// Depth processing for a pixel -- render_mode 0: OPAQ, 1: PT, 2: TRANS
|
||||
void PixelFlush_isp(RenderMode render_mode, u32 depth_mode, u32 ZWriteDis, float x, float y, float invW, u32 index, parameter_tag_t tag);
|
||||
|
||||
|
||||
/*
|
||||
Main renderer class
|
||||
@@ -180,4 +191,5 @@ void RenderTriangleArray(RenderMode render_mode, ObjectListEntry obj, taRECT* re
|
||||
void RenderQuadArray(RenderMode render_mode, ObjectListEntry obj, taRECT* rect);
|
||||
void RenderObjectList(RenderMode render_mode, pvr32addr_t base, taRECT* rect);
|
||||
void RenderCORE();
|
||||
void Hackpresent();
|
||||
void Hackpresent();
|
||||
void ClearFpuCache();
|
||||
23
vendor/koshle/dc_hle_types.h
vendored
23
vendor/koshle/dc_hle_types.h
vendored
@@ -1,6 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#if ( (__LONG_MAX__ *2UL+1UL) == 18446744073709551615ULL) && ((__INT_MAX__ *2U +1U) == 4294967295ULL)
|
||||
#if defined(_WIN64) && _WIN64 == 1
|
||||
typedef unsigned long long uint64; /**< \brief 64-bit unsigned integer */
|
||||
typedef unsigned int uint32; /**< \brief 32-bit unsigned integer */
|
||||
typedef unsigned short uint16; /**< \brief 16-bit unsigned integer */
|
||||
typedef unsigned char uint8; /**< \brief 8-bit unsigned integer */
|
||||
typedef long long int64; /**< \brief 64-bit signed integer */
|
||||
typedef int int32; /**< \brief 32-bit signed integer */
|
||||
typedef short int16; /**< \brief 16-bit signed integer */
|
||||
typedef char int8; /**< \brief 8-bit signed integer */
|
||||
|
||||
typedef volatile unsigned long long vuint64; /**< \brief 64-bit unsigned integer */
|
||||
typedef volatile unsigned int vuint32; /**< \brief 32-bit unsigned integer */
|
||||
typedef volatile unsigned short vuint16; /**< \brief 16-bit unsigned integer */
|
||||
typedef volatile unsigned char vuint8; /**< \brief 8-bit unsigned integer */
|
||||
typedef volatile long long vint64; /**< \brief 64-bit signed integer */
|
||||
typedef volatile int vint32; /**< \brief 32-bit signed integer */
|
||||
typedef volatile short vint16; /**< \brief 16-bit signed integer */
|
||||
typedef volatile char vint8; /**< \brief 8-bit signed integer */
|
||||
|
||||
typedef uint64 ptr_t;
|
||||
#define INT32_IS_INT
|
||||
#elif (( (__LONG_MAX__ *2UL+1UL) == 18446744073709551615ULL) && ((__INT_MAX__ *2U +1U) == 4294967295ULL))
|
||||
typedef unsigned long uint64; /**< \brief 64-bit unsigned integer */
|
||||
typedef unsigned int uint32; /**< \brief 32-bit unsigned integer */
|
||||
typedef unsigned short uint16; /**< \brief 16-bit unsigned integer */
|
||||
|
||||
3
vendor/koshle/hlekos.cpp
vendored
3
vendor/koshle/hlekos.cpp
vendored
@@ -24,7 +24,7 @@ void * maple_dev_status(maple_device*) {
|
||||
maple_device_t * maple_enum_type(int n, uint32 func) {
|
||||
return &dev;
|
||||
}
|
||||
|
||||
namespace kos {
|
||||
int sem_wait_timed(semaphore_t *sem, int timeout) {
|
||||
auto count = sem->count.load();
|
||||
|
||||
@@ -54,6 +54,7 @@ int sem_signal(semaphore_t *sem) {
|
||||
sem->count++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void sq_lock(void *dest) {
|
||||
|
||||
|
||||
3
vendor/koshle/kos/sem.h
vendored
3
vendor/koshle/kos/sem.h
vendored
@@ -32,6 +32,7 @@
|
||||
|
||||
\headerfile kos/sem.h
|
||||
*/
|
||||
namespace kos {
|
||||
typedef struct semaphore {
|
||||
std::atomic<int> initialized; /**< \brief Are we initialized? */
|
||||
std::atomic<int> count; /**< \brief The semaphore count */
|
||||
@@ -177,5 +178,7 @@ int sem_signal(semaphore_t *sem);
|
||||
*/
|
||||
int sem_count(semaphore_t *sem);
|
||||
|
||||
}
|
||||
using namespace kos;
|
||||
|
||||
#endif /* __KOS_SEM_H */
|
||||
|
||||
2
vendor/koshle/pvr_internal.h
vendored
2
vendor/koshle/pvr_internal.h
vendored
@@ -233,7 +233,7 @@ typedef struct {
|
||||
|
||||
/* Wait-ready semaphore: this will be signaled whenever the pvr_wait_ready()
|
||||
call should be ready to return. */
|
||||
semaphore_t ready_sem;
|
||||
kos::semaphore_t ready_sem;
|
||||
|
||||
// Handle for the vblank interrupt
|
||||
int vbl_handle;
|
||||
|
||||
8
vendor/librw/src/base.cpp
vendored
8
vendor/librw/src/base.cpp
vendored
@@ -89,10 +89,18 @@ strncmp_ci(const char *s1, const char *s2, int n)
|
||||
Quat
|
||||
mult(const Quat &q, const Quat &p)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
|
||||
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
|
||||
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
|
||||
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
|
||||
#else
|
||||
Quat o;
|
||||
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
|
||||
reinterpret_cast<const dc::quaternion_t &>(q),
|
||||
reinterpret_cast<const dc::quaternion_t &>(p));
|
||||
return o;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
206
vendor/librw/src/camera.cpp
vendored
206
vendor/librw/src/camera.cpp
vendored
@@ -429,6 +429,7 @@ Camera::frustumTestSphere(const Sphere *s) const
|
||||
{
|
||||
int32 res = SPHEREINSIDE;
|
||||
const FrustumPlane *p = this->frustumPlanes;
|
||||
#ifndef DC_SH4
|
||||
for(int32 i = 0; i < 6; i++){
|
||||
float32 dist = dot(p->plane.normal, s->center) - p->plane.distance;
|
||||
if(s->radius < dist)
|
||||
@@ -437,6 +438,114 @@ Camera::frustumTestSphere(const Sphere *s) const
|
||||
res = SPHEREBOUNDARY;
|
||||
p++;
|
||||
}
|
||||
#else
|
||||
__builtin_prefetch(p);
|
||||
|
||||
register float sx asm("fr0") = s->center.x;
|
||||
register float sy asm("fr1") = s->center.y;
|
||||
register float sz asm("fr2") = s->center.z;
|
||||
register float sw asm("fr3") = -1.0f;
|
||||
|
||||
// far
|
||||
register float px asm("fr4") = p->plane.normal.x;
|
||||
register float py asm("fr5") = p->plane.normal.y;
|
||||
register float pz asm("fr6") = p->plane.normal.z;
|
||||
register float pw asm("fr7") = p->plane.distance;
|
||||
|
||||
asm volatile("fipr fv0, fv4"
|
||||
: "+f" (pw)
|
||||
: "f" (sx), "f" (sy), "f" (sz), "f" (sw),
|
||||
"f" (px), "f" (py), "f" (pz));
|
||||
if(s->radius < pw)
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius > -pw)
|
||||
res = SPHEREBOUNDARY;
|
||||
p++;
|
||||
|
||||
// near
|
||||
px = p->plane.normal.x;
|
||||
py = p->plane.normal.y;
|
||||
pz = p->plane.normal.z;
|
||||
pw = p->plane.distance;
|
||||
asm volatile("fipr fv0, fv4"
|
||||
: "+f" (pw)
|
||||
: "f" (sx), "f" (sy), "f" (sz), "f" (sw),
|
||||
"f" (px), "f" (py), "f" (pz));
|
||||
if(s->radius < pw)
|
||||
return SPHEREOUTSIDE;
|
||||
if(s->radius > -pw)
|
||||
res = SPHEREBOUNDARY_NEAR;
|
||||
p++;
|
||||
|
||||
const float* base_ptr0 = &p[0].plane.normal.x;
|
||||
const float* base_ptr1 = &p[1].plane.normal.x;
|
||||
const float* base_ptr2 = &p[2].plane.normal.x;
|
||||
const float* base_ptr3 = &p[3].plane.normal.x;
|
||||
|
||||
__builtin_prefetch(base_ptr0);
|
||||
|
||||
static_assert(offsetof (decltype (p[0].plane.normal), y)
|
||||
-offsetof (decltype (p[0].plane.normal), x) == sizeof (float));
|
||||
|
||||
static_assert(offsetof (decltype (p[0].plane.normal), z)
|
||||
-offsetof (decltype (p[0].plane.normal), y) == sizeof (float));
|
||||
|
||||
static_assert(offsetof (decltype (p[0].plane), distance)
|
||||
-offsetof (decltype (p[0].plane.normal), z) == sizeof (float));
|
||||
|
||||
asm volatile (R"(
|
||||
frchg
|
||||
|
||||
fmov.s @%0+,fr0
|
||||
fmov.s @%1+,fr1
|
||||
fmov.s @%2+,fr2
|
||||
fmov.s @%3+,fr3
|
||||
|
||||
fmov.s @%0+,fr4
|
||||
fmov.s @%1+,fr5
|
||||
fmov.s @%2+,fr6
|
||||
fmov.s @%3+,fr7
|
||||
|
||||
fmov.s @%0+,fr8
|
||||
fmov.s @%1+,fr9
|
||||
fmov.s @%2+,fr10
|
||||
fmov.s @%3+,fr11
|
||||
|
||||
fmov.s @%0,fr12
|
||||
fmov.s @%1,fr13
|
||||
fmov.s @%2,fr14
|
||||
fmov.s @%3,fr15
|
||||
|
||||
frchg
|
||||
)" : "+&r" (base_ptr0), "+&r" (base_ptr1), "+&r" (base_ptr2), "+&r" (base_ptr3)
|
||||
:
|
||||
: );
|
||||
|
||||
float dists[4];
|
||||
mat_trans_vec4_nodiv_nomod(sx, sy, sz, sw,
|
||||
dists[0], dists[1], dists[2], dists[3]);
|
||||
|
||||
if(s->radius < dists[0])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius > -dists[0])
|
||||
res = SPHEREBOUNDARY;
|
||||
|
||||
if(s->radius < dists[1])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius > -dists[1])
|
||||
res = SPHEREBOUNDARY;
|
||||
|
||||
if(s->radius < dists[2])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius > -dists[2])
|
||||
res = SPHEREBOUNDARY;
|
||||
|
||||
if(s->radius < dists[3])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius > -dists[3])
|
||||
res = SPHEREBOUNDARY;
|
||||
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -445,7 +554,7 @@ Camera::frustumTestSphereNear(const Sphere *s) const
|
||||
{
|
||||
int32 res = SPHEREINSIDE;
|
||||
const FrustumPlane *p = this->frustumPlanes;
|
||||
|
||||
#ifndef DC_SH4
|
||||
// far
|
||||
float32 dist = dot(p->plane.normal, s->center) - p->plane.distance;
|
||||
if(s->radius < dist)
|
||||
@@ -481,6 +590,101 @@ Camera::frustumTestSphereNear(const Sphere *s) const
|
||||
return SPHEREOUTSIDE;
|
||||
p++;
|
||||
|
||||
#else
|
||||
__builtin_prefetch(p);
|
||||
|
||||
register float sx asm("fr0") = s->center.x;
|
||||
register float sy asm("fr1") = s->center.y;
|
||||
register float sz asm("fr2") = s->center.z;
|
||||
register float sw asm("fr3") = -1.0f;
|
||||
|
||||
// far
|
||||
register float px asm("fr4") = p->plane.normal.x;
|
||||
register float py asm("fr5") = p->plane.normal.y;
|
||||
register float pz asm("fr6") = p->plane.normal.z;
|
||||
register float pw asm("fr7") = p->plane.distance;
|
||||
|
||||
asm volatile("fipr fv0, fv4"
|
||||
: "+f" (pw)
|
||||
: "f" (sx), "f" (sy), "f" (sz), "f" (sw),
|
||||
"f" (px), "f" (py), "f" (pz));
|
||||
if(s->radius < pw)
|
||||
return SPHEREOUTSIDE;
|
||||
p++;
|
||||
|
||||
// near
|
||||
px = p->plane.normal.x;
|
||||
py = p->plane.normal.y;
|
||||
pz = p->plane.normal.z;
|
||||
pw = p->plane.distance;
|
||||
asm volatile("fipr fv0, fv4"
|
||||
: "+f" (pw)
|
||||
: "f" (sx), "f" (sy), "f" (sz), "f" (sw),
|
||||
"f" (px), "f" (py), "f" (pz));
|
||||
if(s->radius < pw)
|
||||
return SPHEREOUTSIDE;
|
||||
if(s->radius > -pw)
|
||||
res = SPHEREBOUNDARY_NEAR;
|
||||
p++;
|
||||
|
||||
const float* base_ptr0 = &p[0].plane.normal.x;
|
||||
const float* base_ptr1 = &p[1].plane.normal.x;
|
||||
const float* base_ptr2 = &p[2].plane.normal.x;
|
||||
const float* base_ptr3 = &p[3].plane.normal.x;
|
||||
|
||||
__builtin_prefetch(base_ptr0);
|
||||
|
||||
static_assert(offsetof (decltype (p[0].plane.normal), y)
|
||||
-offsetof (decltype (p[0].plane.normal), x) == sizeof (float));
|
||||
|
||||
static_assert(offsetof (decltype (p[0].plane.normal), z)
|
||||
-offsetof (decltype (p[0].plane.normal), y) == sizeof (float));
|
||||
|
||||
static_assert(offsetof (decltype (p[0].plane), distance)
|
||||
-offsetof (decltype (p[0].plane.normal), z) == sizeof (float));
|
||||
|
||||
asm volatile (R"(
|
||||
frchg
|
||||
|
||||
fmov.s @%0+,fr0
|
||||
fmov.s @%1+,fr1
|
||||
fmov.s @%2+,fr2
|
||||
fmov.s @%3+,fr3
|
||||
|
||||
fmov.s @%0+,fr4
|
||||
fmov.s @%1+,fr5
|
||||
fmov.s @%2+,fr6
|
||||
fmov.s @%3+,fr7
|
||||
|
||||
fmov.s @%0+,fr8
|
||||
fmov.s @%1+,fr9
|
||||
fmov.s @%2+,fr10
|
||||
fmov.s @%3+,fr11
|
||||
|
||||
fmov.s @%0,fr12
|
||||
fmov.s @%1,fr13
|
||||
fmov.s @%2,fr14
|
||||
fmov.s @%3,fr15
|
||||
|
||||
frchg
|
||||
)" : "+&r" (base_ptr0), "+&r" (base_ptr1), "+&r" (base_ptr2), "+&r" (base_ptr3)
|
||||
:
|
||||
: );
|
||||
|
||||
float dists[4];
|
||||
mat_trans_vec4_nodiv_nomod(sx, sy, sz, sw,
|
||||
dists[0], dists[1], dists[2], dists[3]);
|
||||
|
||||
if(s->radius < dists[0])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius < dists[1])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius < dists[2])
|
||||
return SPHEREOUTSIDE;
|
||||
else if(s->radius < dists[3])
|
||||
return SPHEREOUTSIDE;
|
||||
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
247
vendor/librw/src/dc/rwdc.cpp
vendored
247
vendor/librw/src/dc/rwdc.cpp
vendored
@@ -37,8 +37,6 @@ extern const char* currentFile;
|
||||
#include <functional>
|
||||
#include <fstream>
|
||||
|
||||
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
|
||||
|
||||
#define errorf(...) dbglog(DBG_CRITICAL, __VA_ARGS__)
|
||||
#define logf(...) // printf(__VA_ARGS__)
|
||||
bool re3RemoveLeastUsedModel();
|
||||
@@ -47,21 +45,13 @@ void* re3StreamingAlloc(size_t size);
|
||||
|
||||
// #include "rwdcimpl.h"
|
||||
|
||||
#include <dc/pvr.h>
|
||||
#include <dc/matrix.h>
|
||||
#include "alloc.h"
|
||||
|
||||
#undef PVR_TXRFMT_STRIDE
|
||||
#define PVR_TXRFMT_STRIDE (1 << 25)
|
||||
|
||||
static_assert(PVR_TXRFMT_STRIDE == (1 << 25), "PVR_TXRFMT_STRIDE is bugged in your KOS version");
|
||||
using namespace dc;
|
||||
|
||||
// TODO: probably needs a better place to be
|
||||
bool doEnvironmentMaps = true;
|
||||
|
||||
#define fclamp0_1(n) ((n) > 1.0f ? 1.0f : n < 0.0f ? 0.0f : n)
|
||||
#define fclamp1(n) ((n) > 1.0f ? 1.0f : n)
|
||||
|
||||
struct alignas(32) pvr_vertex16_t {
|
||||
uint32_t flags; /**< \brief TA command (vertex flags) */
|
||||
float x; /**< \brief X coordinate */
|
||||
@@ -170,177 +160,10 @@ struct alignas(32) pvr_vertex32_ut {
|
||||
static_assert(sizeof(pvr_vertex16_t) == 32, "pvr_vertex16_t size mismatch");
|
||||
static_assert(alignof(pvr_vertex16_t) == 32, "pvr_vertex16_t alignof mismatch");
|
||||
|
||||
|
||||
#define MATH_Fast_Invert(x) ({ (((x) < 0.0f)? -1.0f : 1.0f) * frsqrt((x) * (x)); })
|
||||
|
||||
static pvr_dr_state_t drState;
|
||||
|
||||
#include <kos/dbglog.h>
|
||||
|
||||
float VIDEO_MODE_SCALE_X;
|
||||
|
||||
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
|
||||
#include <kos.h>
|
||||
|
||||
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_nodiv_nomod_zerow(x, y, z, x2, y2, z2, w2) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 0.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_w_nodiv_nomod(x, y, z, w) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
w = __w; \
|
||||
} while(false)
|
||||
|
||||
// no declspec naked, so can't do rts / fschg. instead compiler pads with nop?
|
||||
|
||||
inline void rw_mat_load_3x3(const rw::Matrix* mtx) {
|
||||
__asm__ __volatile__ (
|
||||
R"(
|
||||
fschg
|
||||
frchg
|
||||
|
||||
fmov @%[mtx]+, dr0
|
||||
|
||||
fldi0 fr12
|
||||
fldi0 fr13
|
||||
|
||||
fmov @%[mtx]+, dr2
|
||||
fmov @%[mtx]+, dr4
|
||||
fmov @%[mtx]+, dr6
|
||||
fmov @%[mtx]+, dr8
|
||||
fmov @%[mtx]+, dr10
|
||||
|
||||
fldi0 fr3
|
||||
fldi0 fr7
|
||||
fldi0 fr11
|
||||
fmov dr12, dr14
|
||||
|
||||
fschg
|
||||
frchg
|
||||
)"
|
||||
: [mtx] "+r" (mtx)
|
||||
);
|
||||
}
|
||||
|
||||
// sets pos.w to 1
|
||||
inline void rw_mat_load_4x4(const rw::Matrix* mtx) {
|
||||
__asm__ __volatile__ (
|
||||
R"(
|
||||
fschg
|
||||
frchg
|
||||
fmov @%[mtx]+, dr0
|
||||
|
||||
fmov @%[mtx]+, dr2
|
||||
fmov @%[mtx]+, dr4
|
||||
fmov @%[mtx]+, dr6
|
||||
fmov @%[mtx]+, dr8
|
||||
fmov @%[mtx]+, dr10
|
||||
fmov @%[mtx]+, dr12
|
||||
fmov @%[mtx]+, dr14
|
||||
fldi1 fr15
|
||||
|
||||
fschg
|
||||
frchg
|
||||
)"
|
||||
: [mtx] "+r" (mtx)
|
||||
);
|
||||
}
|
||||
|
||||
#else
|
||||
extern matrix_t XMTRX;
|
||||
|
||||
void rw_mat_load_3x3(rw::Matrix* mtx) {
|
||||
memcpy(XMTRX, mtx, sizeof(matrix_t));
|
||||
XMTRX[0][3] = 0.0f;
|
||||
XMTRX[1][3] = 0.0f;
|
||||
XMTRX[2][3] = 0.0f;
|
||||
|
||||
XMTRX[3][0] = 0.0f;
|
||||
XMTRX[3][1] = 0.0f;
|
||||
XMTRX[3][2] = 0.0f;
|
||||
XMTRX[3][3] = 0.0f;
|
||||
}
|
||||
|
||||
void rw_mat_load_4x4(rw::Matrix* mtx) {
|
||||
memcpy(XMTRX, mtx, sizeof(matrix_t));
|
||||
XMTRX[3][3] = 1.0f;
|
||||
}
|
||||
|
||||
#include <dc/matrix.h>
|
||||
#define frsqrt(a) (1.0f/sqrt(a))
|
||||
#define dcache_pref_block(a) __builtin_prefetch(a)
|
||||
|
||||
#ifndef __always_inline
|
||||
#define __always_inline __attribute__((always_inline)) inline
|
||||
#endif
|
||||
|
||||
#ifdef DC_TEXCONV
|
||||
#define mat_transform(a, b, c, d)
|
||||
#define mat_apply(a)
|
||||
#define mat_load(a)
|
||||
#define mat_store(a)
|
||||
#define mat_identity(a)
|
||||
#define pvr_fog_table_color(a,r,g,b)
|
||||
#define pvr_fog_table_linear(s,e)
|
||||
#define pvr_fog_table_exp(d)
|
||||
#define pvr_fog_table_custom(d)
|
||||
#endif
|
||||
|
||||
#define mat_trans_single3_nomod(x_, y_, z_, x2, y2, z2) do { \
|
||||
vector_t tmp = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp, &tmp, 1, 0); \
|
||||
z2 = 1.0f / tmp.w; \
|
||||
x2 = tmp.x * z2; \
|
||||
y2 = tmp.y * z2; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
|
||||
vector_t tmp1233123 = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
|
||||
x2 = tmp1233123.x; y2 = tmp1233123.y; z2 = tmp1233123.z; w2 = tmp1233123.w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_nodiv_nomod_zerow(x_, y_, z_, x2, y2, z2, w2) do { \
|
||||
vector_t tmp1233123 = { x_, y_, z_, 0.0f }; \
|
||||
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
|
||||
x2 = tmp1233123.x; y2 = tmp1233123.y; z2 = tmp1233123.z; w2 = tmp1233123.w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_w_nodiv_nomod(x_, y_, z_, w_) do { \
|
||||
vector_t tmp1233123 = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
|
||||
w_ = tmp1233123.w; \
|
||||
} while(false)
|
||||
|
||||
#define memcpy4 memcpy
|
||||
|
||||
// END STUBS
|
||||
#endif
|
||||
|
||||
static pvr_ptr_t fake_tex;
|
||||
|
||||
alignas(4) static const uint16_t fake_tex_data[] = {
|
||||
@@ -551,9 +374,9 @@ void DCE_MatrixViewport(float x, float y, float width, float height) {
|
||||
|
||||
void DCE_InitMatrices() {
|
||||
// Setup the screenview matrix. Only need to do once since this matrix does not need to change for single player viewpoint.
|
||||
mat_identity();
|
||||
mat_identity2();
|
||||
|
||||
mat_store(&DCE_MAT_SCREENVIEW);
|
||||
mat_store2(&DCE_MAT_SCREENVIEW);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -684,7 +507,7 @@ struct atomic_context_t {
|
||||
__always_inline void DCE_RenderSubmitVertex(const pvr_vertex_t *v, uint32_t flags) {
|
||||
auto *sq = reinterpret_cast<uint32_t *>(pvr_dr_target(drState));
|
||||
auto *src = reinterpret_cast<const uint32_t *>(v);
|
||||
float sz = MATH_Fast_Invert(v->z);
|
||||
float sz = Invert<true, false>(v->z);
|
||||
float sx = v->x * sz;
|
||||
float sy = v->y * sz;
|
||||
|
||||
@@ -711,7 +534,7 @@ __always_inline void DCE_RenderSubmitVertexIM3D(float x, float y, float w,
|
||||
{
|
||||
auto *sq = reinterpret_cast<uint32_t *>(pvr_dr_target(drState));
|
||||
auto *uv32 = reinterpret_cast<const uint32_t *>(uv);
|
||||
float sz = MATH_Fast_Invert(w);
|
||||
float sz = Invert<true, false>(w);
|
||||
float sx = x * sz;
|
||||
float sy = y * sz;
|
||||
|
||||
@@ -728,13 +551,6 @@ __always_inline void DCE_RenderSubmitVertexIM3D(float x, float y, float w,
|
||||
|
||||
/* END TA Submission Functions*/
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(DC_TEXCONV)
|
||||
void malloc_stats() { }
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#define UNIMPL_LOG() printf("TODO: Implement %s @ %s:%d\n", __func__, __FILE__, __LINE__);
|
||||
#define UNIMPL_LOGV(fmt, ...) printf("TODO: Implement %s @ %s:%d " fmt "\n", __func__, __FILE__, __LINE__, __VA_ARGS__);
|
||||
@@ -808,9 +624,8 @@ void beginUpdate(Camera* cam) {
|
||||
|
||||
DCE_MatrixViewport(0, 0, cam->frameBuffer->width * VIDEO_MODE_SCALE_X, cam->frameBuffer->height);
|
||||
|
||||
mat_load((matrix_t*)&DCE_MAT_SCREENVIEW);
|
||||
mat_apply((matrix_t*)&cam->devProj);
|
||||
mat_store((matrix_t*)&cam->devProjScreen);
|
||||
mat_load_apply((matrix_t*)&DCE_MAT_SCREENVIEW, (matrix_t*)&cam->devProj);
|
||||
mat_store2((matrix_t*)&cam->devProjScreen);
|
||||
|
||||
}
|
||||
|
||||
@@ -1838,7 +1653,7 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert
|
||||
pvrVert->flags = flags;
|
||||
pvrVert->x = gtaVert.x * VIDEO_MODE_SCALE_X;
|
||||
pvrVert->y = gtaVert.y;
|
||||
pvrVert->z = MATH_Fast_Invert(gtaVert.w); // this is perfect for almost every case...
|
||||
pvrVert->z = Invert<true, false>(gtaVert.w); // this is perfect for almost every case...
|
||||
pvrVert->u = gtaVert.u;
|
||||
pvrVert->v = gtaVert.v;
|
||||
pvrVert->argb = (gtaVert.a << 24) |
|
||||
@@ -1959,7 +1774,7 @@ void im2DRenderIndexedPrimitive(PrimitiveType primType, void *vertices, int32 nu
|
||||
pvrVert->flags = flags;
|
||||
pvrVert->x = gtaVert.x * VIDEO_MODE_SCALE_X;
|
||||
pvrVert->y = gtaVert.y;
|
||||
pvrVert->z = MATH_Fast_Invert(gtaVert.w); // this is perfect for almost every case...
|
||||
pvrVert->z = Invert<true, false>(gtaVert.w); // this is perfect for almost every case...
|
||||
pvrVert->u = gtaVert.u;
|
||||
pvrVert->v = gtaVert.v;
|
||||
pvrVert->argb = (gtaVert.a << 24) |
|
||||
@@ -2022,8 +1837,8 @@ void im3DTransform(void *vertices, int32 numVertices, Matrix *worldMat, uint32 f
|
||||
rw::RawMatrix::mult(&worldview, &world, &cam->devView);
|
||||
rw::RawMatrix::mult(&proj, &worldview, &cam->devProj);
|
||||
rw::RawMatrix::mult(&mtx, &proj, (RawMatrix*)&DCE_MAT_SCREENVIEW);
|
||||
// mat_load(&DCE_MAT_SCREENVIEW); // ~11 cycles.
|
||||
mat_load(( matrix_t*)&mtx.right); // Number of cycles: ~32.
|
||||
// mat_load2(&DCE_MAT_SCREENVIEW); // ~11 cycles.
|
||||
mat_load2(( matrix_t*)&mtx.right); // Number of cycles: ~32.
|
||||
if (im3dVertices) {
|
||||
free(im3dVertices);
|
||||
}
|
||||
@@ -2117,7 +1932,7 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType,
|
||||
|
||||
// assuming near plane is 0.0f
|
||||
// gv1 is visible (posi), and gv2 is behind the plane (negative)
|
||||
float t = (1.0f - gv1.position.z) * MATH_Fast_Invert(gv2.position.z - gv1.position.z);
|
||||
float t = (1.0f - gv1.position.z) * Invert<true, true>(gv2.position.z - gv1.position.z);
|
||||
|
||||
pvr_vertex_t pvrVert;
|
||||
|
||||
@@ -3395,13 +3210,13 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
|
||||
auto skinningWeightData = (uint8_t*)skinWeights;
|
||||
|
||||
if (!matrix0Identity) {
|
||||
rw_mat_load_4x4(&skinMatrices[0]);
|
||||
mat_load_4x4(&skinMatrices[0]);
|
||||
if (small_xyz) {
|
||||
mat_apply(&DCE_MESHLET_MAT_DECODE);
|
||||
}
|
||||
} else {
|
||||
if (small_xyz) {
|
||||
mat_load(&DCE_MESHLET_MAT_DECODE);
|
||||
mat_load2(&DCE_MESHLET_MAT_DECODE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3465,7 +3280,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
|
||||
break;
|
||||
}
|
||||
|
||||
rw_mat_load_4x4(currentMatrix);
|
||||
mat_load_4x4(currentMatrix);
|
||||
if (small_xyz){
|
||||
mat_apply(&DCE_MESHLET_MAT_DECODE);
|
||||
}
|
||||
@@ -3498,7 +3313,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
|
||||
auto skinningWeightData = (uint8_t*)skinWeights;
|
||||
|
||||
if (!matrix0Identity) {
|
||||
rw_mat_load_3x3(&skinMatrices[0]);
|
||||
mat_load_3x3(&skinMatrices[0]);
|
||||
}
|
||||
|
||||
for(;;) {
|
||||
@@ -3556,7 +3371,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
|
||||
break;
|
||||
}
|
||||
|
||||
rw_mat_load_3x3(currentMatrix);
|
||||
mat_load_3x3(currentMatrix);
|
||||
|
||||
do {
|
||||
auto srcOffset = *skinningIndexData++;
|
||||
@@ -3581,7 +3396,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
|
||||
__attribute__((noinline))
|
||||
void tnlMeshletEnvMap(uint8_t* OCR, uint8_t* normal, int vertexCount, int vertexSize, matrix_t* matfxMatrix, float matfxCoefficient) {
|
||||
|
||||
mat_load(matfxMatrix);
|
||||
mat_load2(matfxMatrix);
|
||||
|
||||
do {
|
||||
pvr_vertex64_t* v = (pvr_vertex64_t*)OCR;
|
||||
@@ -3998,15 +3813,12 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
||||
|
||||
lightingCB(atomic, ac->uniform);
|
||||
|
||||
|
||||
rw::RawMatrix world;
|
||||
rw::convMatrix(&world, atomic->getFrame()->getLTM());
|
||||
|
||||
|
||||
mat_load((matrix_t*)&cam->devProjScreen);
|
||||
mat_apply((matrix_t*)&cam->devView);
|
||||
mat_load_apply((matrix_t*)&cam->devProjScreen, (matrix_t*)&cam->devView);
|
||||
mat_apply((matrix_t*)&world);
|
||||
mat_store((matrix_t*)&atomicContexts.back().mtx);
|
||||
mat_store2((matrix_t*)&atomicContexts.back().mtx);
|
||||
|
||||
auto meshes = geo->meshHeader->getMeshes();
|
||||
|
||||
@@ -4245,15 +4057,14 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
||||
unsigned skinSelector = small_xyz + acp->skinMatrix0Identity*2;
|
||||
tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &acp->skinContextPointer->mtx);
|
||||
|
||||
mat_load(&mtx);
|
||||
mat_load2(&mtx);
|
||||
tnlMeshletTransformSelector[clippingRequired * 2](OCR_SPACE, OCR_SPACE + 4, meshlet->vertexCount, 64);
|
||||
} else {
|
||||
|
||||
if (selector & 8) {
|
||||
mat_load(&mtx);
|
||||
mat_apply(&DCE_MESHLET_MAT_DECODE);
|
||||
mat_load_apply(&mtx, &DCE_MESHLET_MAT_DECODE);
|
||||
} else {
|
||||
mat_load(&mtx);
|
||||
mat_load2(&mtx);
|
||||
}
|
||||
tnlMeshletTransformSelector[smallSelector](OCR_SPACE, &dcModel->data[meshlet->vertexOffset], meshlet->vertexCount, meshlet->vertexSize);
|
||||
}
|
||||
@@ -4280,7 +4091,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
||||
|
||||
unsigned dstColOffset = textured ? offsetof(pvr_vertex64_t, a) : offsetof(pvr_vertex32_ut, a);
|
||||
dce_set_mat_vertex_color(&residual, &material);
|
||||
mat_load(&DCE_MESHLET_MAT_VERTEX_COLOR);
|
||||
mat_load2(&DCE_MESHLET_MAT_VERTEX_COLOR);
|
||||
tnlMeshletVertexColorSelector[0](OCR_SPACE + dstColOffset, (int8_t*)&dcModel->data[meshlet->vertexOffset] + colOffset, meshlet->vertexCount, meshlet->vertexSize);
|
||||
} else {
|
||||
unsigned dstColOffset = textured ? offsetof(pvr_vertex64_t, a) : offsetof(pvr_vertex32_ut, a);
|
||||
@@ -4302,7 +4113,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
||||
|
||||
|
||||
unsigned normalSelector = (pass1 - 1) + (skin != 0) * 4;
|
||||
mat_load((matrix_t*)&uniformObject.dir[0][0][0]);
|
||||
mat_load2((matrix_t*)&uniformObject.dir[0][0][0]);
|
||||
auto normalPointer = &dcModel->data[meshlet->vertexOffset] + normalOffset;
|
||||
auto vtxSize = meshlet->vertexSize;
|
||||
if (skin) {
|
||||
@@ -4317,7 +4128,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
||||
|
||||
if (pass2) {
|
||||
unsigned normalSelector = (pass2 - 1) + (skin != 0) * 4;
|
||||
mat_load((matrix_t*)&uniformObject.dir[1][0][0]);
|
||||
mat_load2((matrix_t*)&uniformObject.dir[1][0][0]);
|
||||
tnlMeshletDiffuseColorSelector[normalSelector](OCR_SPACE + dstColOffset, normalPointer, meshlet->vertexCount, vtxSize, &lightDiffuseColors[4]);
|
||||
}
|
||||
}
|
||||
@@ -4380,7 +4191,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
||||
indices.back() |= 0x80;
|
||||
|
||||
pvr_vertex64_t *vd = (pvr_vertex64_t *)OCR_SPACE;
|
||||
mat_load(&mtx); // Number of cycles: ~11
|
||||
mat_load2(&mtx); // Number of cycles: ~11
|
||||
|
||||
for (int idx = 0; idx < geo->numVertices; idx++) {
|
||||
auto& vert = vertices[idx];
|
||||
@@ -4716,6 +4527,10 @@ imageFindRasterFormat(Image *img, int32 type,
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <windows.h> // For QueryPerformanceCounter and GetCurrentThreadId
|
||||
#ifdef _WIN64
|
||||
#include <profileapi.h>
|
||||
#include <processthreadsapi.h>
|
||||
#endif
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h> // For mach_absolute_time
|
||||
#include <pthread.h>
|
||||
|
||||
739
vendor/librw/src/dc/rwdc_common.h
vendored
Normal file
739
vendor/librw/src/dc/rwdc_common.h
vendored
Normal file
@@ -0,0 +1,739 @@
|
||||
#ifndef __RWDC_COMMON_H
|
||||
#define __RWDC_COMMON_H
|
||||
|
||||
#include <tuple>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#ifndef RW_DC
|
||||
# pragma warning(disable: 4244) // int to float
|
||||
# pragma warning(disable: 4800) // int to bool
|
||||
# pragma warning(disable: 4838) // narrowing conversion
|
||||
# pragma warning(disable: 4996) // POSIX names
|
||||
#else
|
||||
# if !defined(DC_TEXCONV) && !defined(DC_SIM)
|
||||
# include <kos.h>
|
||||
# define DC_SH4
|
||||
# define VIDEO_MODE_WIDTH static_cast<float>(vid_mode->width)
|
||||
# define VIDEO_MODE_HEIGHT static_cast<float>(vid_mode->height)
|
||||
# define memcpy4 memcpy
|
||||
# else
|
||||
# ifdef DC_TEXCONV
|
||||
# define malloc_stats()
|
||||
# endif
|
||||
# include <dc/matrix.h>
|
||||
# include <dc/pvr.h>
|
||||
# include <kos/dbglog.h>
|
||||
# define VIDEO_MODE_WIDTH 640.0f
|
||||
# define VIDEO_MODE_HEIGHT 480.0f
|
||||
# define memcpy4 memcpy
|
||||
# define frsqrt(a) (1.0f/sqrtf(a))
|
||||
# define dcache_pref_block(a) __builtin_prefetch(a)
|
||||
# define F_PI M_PI
|
||||
# ifndef __always_inline
|
||||
# define __always_inline inline
|
||||
# endif
|
||||
# endif
|
||||
# ifdef PVR_TXRFMT_STRIDE
|
||||
# undef PVR_TXRFMT_STRIDE
|
||||
# define PVR_TXRFMT_STRIDE (1 << 25)
|
||||
# endif
|
||||
static_assert(PVR_TXRFMT_STRIDE == (1 << 25),
|
||||
"PVR_TXRFMT_STRIDE is bugged in your KOS version");
|
||||
#endif
|
||||
|
||||
#define F_PI_2 (F_PI * 0.5f)
|
||||
#define __hot __attribute__((hot))
|
||||
#define __cold __attribute__((cold))
|
||||
#define __icache_aligned __attribute__((aligned(32)))
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define STR(x) STRINGIFY(x)
|
||||
#define CONCAT_(x,y) x##y
|
||||
#define CONCAT(x,y) CONCAT_(x,y)
|
||||
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
|
||||
|
||||
namespace rw {
|
||||
class Matrix;
|
||||
}
|
||||
|
||||
namespace dc {
|
||||
|
||||
struct quaternion_t {
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
__always_inline __hot constexpr float Sin(float x) { return sinf(x); }
|
||||
__always_inline __hot constexpr float Cos(float x) { return cosf(x); }
|
||||
__always_inline __hot constexpr auto SinCos(float x) { return std::pair { Sin(x), Cos(x) }; }
|
||||
__always_inline __hot constexpr float Tan(float x) { return tanf(x); }
|
||||
__always_inline __hot constexpr float Atan(float x) { return atanf(x); }
|
||||
__always_inline __hot constexpr float Atan2(float y, float x) { return atan2f(y, x); }
|
||||
__always_inline __hot constexpr float Asin(float x) { return asinf(x); }
|
||||
__always_inline __hot constexpr float Acos(float x) { return acosf(x); }
|
||||
__always_inline __hot constexpr float Abs(float x) { return fabsf(x); }
|
||||
__always_inline __hot constexpr float Sqrt(float x) { return sqrtf(x); }
|
||||
__always_inline __hot constexpr float RecipSqrt(float x, float y) { return x / Sqrt(y); }
|
||||
__always_inline __hot constexpr float Pow(float x, float y) { return powf(x, y); }
|
||||
__always_inline __hot constexpr float Floor(float x) { return floorf(x); }
|
||||
__always_inline __hot constexpr float Ceil(float x) { return ceilf(x); }
|
||||
__always_inline __hot constexpr float Fmac(auto a, auto b, auto c) { return a * b + c; }
|
||||
__always_inline __hot constexpr float Lerp(float a, float b, float t) { return Fmac(t, (b - a), a); }
|
||||
__always_inline __hot constexpr auto Max(auto a, auto b) { return ((a > b)? a : b); }
|
||||
__always_inline __hot constexpr auto Min(auto a, auto b) { return ((a < b)? a : b); }
|
||||
|
||||
template<bool CHECK_ZERO=false>
|
||||
__always_inline __hot constexpr float RecipSqrt(float x) {
|
||||
if constexpr(CHECK_ZERO)
|
||||
if(x == 0.0f)
|
||||
return 0.0f;
|
||||
|
||||
return 1.0f / Sqrt(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__always_inline __hot constexpr T Clamp(T v, auto low, auto high) {
|
||||
return std::clamp(v, static_cast<T>(low), static_cast<T>(high));
|
||||
}
|
||||
|
||||
__always_inline constexpr auto Clamp2(auto v, auto center, auto radius) {
|
||||
return (v > center) ? Min(v, center + radius) : Max(v, center - radius);
|
||||
}
|
||||
|
||||
template<bool FAST_APPROX=true, bool COPY_SIGN=true>
|
||||
__always_inline __hot constexpr float Invert(float x) {
|
||||
float value;
|
||||
|
||||
if(!std::is_constant_evaluated() && FAST_APPROX) {
|
||||
value = RecipSqrt(x * x);
|
||||
|
||||
if constexpr(COPY_SIGN)
|
||||
if(x < 0.0f)
|
||||
value = -value;
|
||||
|
||||
} else value = 1.0f / x;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
template<bool FAST_APPROX=true, bool COPY_SIGN=true>
|
||||
__always_inline __hot constexpr float Div(float x, float y) {
|
||||
if(FAST_APPROX && !std::is_constant_evaluated())
|
||||
return x * Invert<true, COPY_SIGN>(y);
|
||||
else
|
||||
return x / y;
|
||||
}
|
||||
|
||||
template<bool FAST_APPROX=false, bool COPY_SIGN=true>
|
||||
__always_inline __hot constexpr auto Norm(auto value, auto min, auto max) {
|
||||
auto numerator = Clamp(value, min, max) - min;
|
||||
auto denominator = (max - min);
|
||||
|
||||
if(FAST_APPROX && !std::is_constant_evaluated())
|
||||
return Div<true, COPY_SIGN>(numerator, denominator);
|
||||
else
|
||||
return numerator / denominator;
|
||||
}
|
||||
|
||||
#ifdef RW_DC
|
||||
# ifdef DC_SH4
|
||||
|
||||
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_nodiv_nomod_zerow(x, y, z, x2, y2, z2, w2) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 0.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_w_nodiv_nomod(x, y, z, w) do { \
|
||||
register float __x __asm__("fr12") = (x); \
|
||||
register float __y __asm__("fr13") = (y); \
|
||||
register float __z __asm__("fr14") = (z); \
|
||||
register float __w __asm__("fr15") = 1.0f; \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
w = __w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_vec4_nodiv_nomod(x, y, z, w, x2, y2, z2, w2) { \
|
||||
register float __x __asm__("fr0") = (x); \
|
||||
register float __y __asm__("fr1") = (y); \
|
||||
register float __z __asm__("fr2") = (z); \
|
||||
register float __w __asm__("fr3") = (w); \
|
||||
__asm__ __volatile__( "ftrv xmtrx, fv0\n" \
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
|
||||
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
|
||||
} while(false)
|
||||
|
||||
inline __hot __icache_aligned void mat_load2(const matrix_t* mtx) {
|
||||
asm volatile(
|
||||
R"(
|
||||
fschg
|
||||
fmov.d @%[mtx],xd0
|
||||
add #32,%[mtx]
|
||||
pref @%[mtx]
|
||||
add #-(32-8),%[mtx]
|
||||
fmov.d @%[mtx]+,xd2
|
||||
fmov.d @%[mtx]+,xd4
|
||||
fmov.d @%[mtx]+,xd6
|
||||
fmov.d @%[mtx]+,xd8
|
||||
fmov.d @%[mtx]+,xd10
|
||||
fmov.d @%[mtx]+,xd12
|
||||
fmov.d @%[mtx]+,xd14
|
||||
fschg
|
||||
)"
|
||||
: [mtx] "+r" (mtx)
|
||||
:
|
||||
:
|
||||
);
|
||||
}
|
||||
|
||||
inline __hot __icache_aligned void mat_store2(matrix_t *mtx) {
|
||||
asm volatile(
|
||||
R"(
|
||||
fschg
|
||||
add #64-8,%[mtx]
|
||||
fmov.d xd14,@%[mtx]
|
||||
add #-32,%[mtx]
|
||||
pref @%[mtx]
|
||||
add #32,%[mtx]
|
||||
fmov.d xd12,@-%[mtx]
|
||||
fmov.d xd10,@-%[mtx]
|
||||
fmov.d xd8,@-%[mtx]
|
||||
fmov.d xd6,@-%[mtx]
|
||||
fmov.d xd4,@-%[mtx]
|
||||
fmov.d xd2,@-%[mtx]
|
||||
fmov.d xd0,@-%[mtx]
|
||||
fschg
|
||||
)"
|
||||
: [mtx] "+&r" (mtx), "=m" (*mtx)
|
||||
:
|
||||
:
|
||||
);
|
||||
}
|
||||
|
||||
inline __hot __icache_aligned void mat_identity2(void) {
|
||||
asm volatile(
|
||||
R"(
|
||||
frchg
|
||||
fldi1 fr0
|
||||
fschg
|
||||
fldi0 fr1
|
||||
fldi0 fr2
|
||||
fldi0 fr3
|
||||
fldi0 fr4
|
||||
fldi1 fr5
|
||||
fmov dr2,dr6
|
||||
fmov dr2,dr8
|
||||
fmov dr0,dr10
|
||||
fmov dr2,dr12
|
||||
fmov dr4,dr14
|
||||
fschg
|
||||
frchg
|
||||
)"
|
||||
);
|
||||
}
|
||||
|
||||
inline __hot __icache_aligned void mat_set_scale(float x, float y, float z) {
|
||||
asm volatile(
|
||||
R"(
|
||||
frchg
|
||||
fldi0 fr1
|
||||
fschg
|
||||
fldi0 fr2
|
||||
fldi0 fr3
|
||||
fldi0 fr4
|
||||
fmov dr2, dr6
|
||||
fmov dr2, dr8
|
||||
fldi0 fr11
|
||||
fmov dr2, dr12
|
||||
fldi0 fr14
|
||||
fschg
|
||||
frchg
|
||||
fmov %[x], xf0
|
||||
fmov %[y], xf5
|
||||
fmov %[z], xf10
|
||||
)"
|
||||
:
|
||||
: [x] "r" (x), [y] "r" (y), [z] "r" (z)
|
||||
:
|
||||
);
|
||||
}
|
||||
|
||||
// no declspec naked, so can't do rts / fschg. instead compiler pads with nop?
|
||||
inline __hot void mat_load_3x3(const rw::Matrix* mtx) {
|
||||
__asm__ __volatile__ (
|
||||
R"(
|
||||
fschg
|
||||
frchg
|
||||
|
||||
fmov @%[mtx]+, dr0
|
||||
|
||||
fldi0 fr12
|
||||
fldi0 fr13
|
||||
|
||||
fmov @%[mtx]+, dr2
|
||||
fmov @%[mtx]+, dr4
|
||||
fmov @%[mtx]+, dr6
|
||||
fmov @%[mtx]+, dr8
|
||||
fmov @%[mtx]+, dr10
|
||||
|
||||
fldi0 fr3
|
||||
fldi0 fr7
|
||||
fldi0 fr11
|
||||
fmov dr12, dr14
|
||||
|
||||
fschg
|
||||
frchg
|
||||
)"
|
||||
: [mtx] "+r" (mtx)
|
||||
);
|
||||
}
|
||||
|
||||
// sets pos.w to 1
|
||||
inline __hot void mat_load_4x4(const rw::Matrix* mtx) {
|
||||
__asm__ __volatile__ (
|
||||
R"(
|
||||
fschg
|
||||
frchg
|
||||
fmov @%[mtx]+, dr0
|
||||
|
||||
fmov @%[mtx]+, dr2
|
||||
fmov @%[mtx]+, dr4
|
||||
fmov @%[mtx]+, dr6
|
||||
fmov @%[mtx]+, dr8
|
||||
fmov @%[mtx]+, dr10
|
||||
fmov @%[mtx]+, dr12
|
||||
fmov @%[mtx]+, dr14
|
||||
fldi1 fr15
|
||||
|
||||
fschg
|
||||
frchg
|
||||
)"
|
||||
: [mtx] "+r" (mtx)
|
||||
);
|
||||
}
|
||||
|
||||
__hot inline void mat_transpose(void) {
|
||||
asm volatile (
|
||||
"frchg\n\t" // fmov for singles only works on front bank
|
||||
// FR0, FR5, FR10, and FR15 are already in place
|
||||
// swap FR1 and FR4
|
||||
"flds FR1, FPUL\n\t"
|
||||
"fmov FR4, FR1\n\t"
|
||||
"fsts FPUL, FR4\n\t"
|
||||
// swap FR2 and FR8
|
||||
"flds FR2, FPUL\n\t"
|
||||
"fmov FR8, FR2\n\t"
|
||||
"fsts FPUL, FR8\n\t"
|
||||
// swap FR3 and FR12
|
||||
"flds FR3, FPUL\n\t"
|
||||
"fmov FR12, FR3\n\t"
|
||||
"fsts FPUL, FR12\n\t"
|
||||
// swap FR6 and FR9
|
||||
"flds FR6, FPUL\n\t"
|
||||
"fmov FR9, FR6\n\t"
|
||||
"fsts FPUL, FR9\n\t"
|
||||
// swap FR7 and FR13
|
||||
"flds FR7, FPUL\n\t"
|
||||
"fmov FR13, FR7\n\t"
|
||||
"fsts FPUL, FR13\n\t"
|
||||
// swap FR11 and FR14
|
||||
"flds FR11, FPUL\n\t"
|
||||
"fmov FR14, FR11\n\t"
|
||||
"fsts FPUL, FR14\n\t"
|
||||
// restore XMTRX to back bank
|
||||
"frchg\n"
|
||||
: // no outputs
|
||||
: // no inputs
|
||||
: "fpul" // clobbers
|
||||
);
|
||||
}
|
||||
|
||||
__hot __icache_aligned inline void mat_copy(matrix_t *dst, const matrix_t *src) {
|
||||
asm volatile(R"(
|
||||
fschg
|
||||
|
||||
pref @%[dst]
|
||||
|
||||
fmov.d @%[src]+, xd0
|
||||
fmov.d @%[src]+, xd2
|
||||
fmov.d @%[src]+, xd4
|
||||
fmov.d @%[src]+, xd6
|
||||
|
||||
pref @%[src]
|
||||
|
||||
add #32, %[dst]
|
||||
|
||||
fmov.d xd6, @-%[dst]
|
||||
fmov.d xd4, @-%[dst]
|
||||
fmov.d xd2, @-%[dst]
|
||||
fmov.d xd0, @-%[dst]
|
||||
|
||||
add #32, %[dst]
|
||||
pref @%[dst]
|
||||
|
||||
fmov.d @%[src]+, xd0
|
||||
fmov.d @%[src]+, xd2
|
||||
fmov.d @%[src]+, xd4
|
||||
fmov.d @%[src]+, xd6
|
||||
|
||||
add #32, %[dst]
|
||||
|
||||
fmov.d xd6, @-%[dst]
|
||||
fmov.d xd4, @-%[dst]
|
||||
fmov.d xd2, @-%[dst]
|
||||
fmov.d xd0, @-%[dst]
|
||||
|
||||
fschg
|
||||
)": [dst] "+&r" (dst), [src] "+&r" (src), "=m" (*dst)
|
||||
:
|
||||
:);
|
||||
}
|
||||
|
||||
//TODO: FIXME FOR VC (AND USE FTRV)
|
||||
template<bool FAST_APPROX=false>
|
||||
__hot constexpr inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
|
||||
if(FAST_APPROX && !std::is_constant_evaluated()) {
|
||||
/*
|
||||
// reorder the coefficients so that q1 stays in constant order {x,y,z,w}
|
||||
// q2 then needs to be rotated after each inner product
|
||||
x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||
y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||
z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||
w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||
*/
|
||||
// keep q1 in fv4
|
||||
register float q1x __asm__ ("fr4") = (q1.x);
|
||||
register float q1y __asm__ ("fr5") = (q1.y);
|
||||
register float q1z __asm__ ("fr6") = (q1.z);
|
||||
register float q1w __asm__ ("fr7") = (q1.w);
|
||||
|
||||
// load q2 into fv8, use it to get the shuffled reorder into fv0
|
||||
register float q2x __asm__ ("fr8") = (q2.x);
|
||||
register float q2y __asm__ ("fr9") = (q2.y);
|
||||
register float q2z __asm__ ("fr10") = (q2.z);
|
||||
register float q2w __asm__ ("fr11") = (q2.w);
|
||||
|
||||
// temporary operand / result in fv0
|
||||
register float t1x __asm__ ("fr0");
|
||||
register float t1y __asm__ ("fr1");
|
||||
register float t1z __asm__ ("fr2");
|
||||
register float t1w __asm__ ("fr3");
|
||||
|
||||
// x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||
t1x = q2w;
|
||||
t1y = q2z;
|
||||
t1z = -q2y;
|
||||
t1w = q2w;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
// x = t1w; try to avoid the stall by not reading the fipr result immediately
|
||||
|
||||
// y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||
t1x = -q2z;
|
||||
t1y = q2w;
|
||||
t1z = q2x;
|
||||
__atomic_thread_fence(1);
|
||||
r->x = t1w; // get previous result
|
||||
t1w = q2y;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
//y = t1w;
|
||||
|
||||
// z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||
t1x = q2y;
|
||||
t1y = -q2x;
|
||||
t1z = q2w;
|
||||
__atomic_thread_fence(1);
|
||||
r->y = t1w; // get previous result
|
||||
t1w = q2z;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
//z = t1w;
|
||||
__atomic_thread_fence(1);
|
||||
|
||||
// w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||
q2x = -q2x;
|
||||
q2y = -q2y;
|
||||
q2z = -q2z;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv8\n"
|
||||
: "+f" (q2w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (q2x), "f" (q2y), "f" (q2z)
|
||||
);
|
||||
|
||||
__atomic_thread_fence(1);
|
||||
r->z = t1w;
|
||||
__atomic_thread_fence(1);
|
||||
r->w = q2w;
|
||||
} else {
|
||||
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||
}
|
||||
}
|
||||
|
||||
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
|
||||
unsigned int prefetch_scratch;
|
||||
|
||||
asm volatile (
|
||||
"mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
|
||||
"add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
|
||||
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
|
||||
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
|
||||
// back matrix
|
||||
"fmov.d @%[bmtrx]+, XD0\n\t" // (LS)
|
||||
"fmov.d @%[bmtrx]+, XD2\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD4\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD6\n\t"
|
||||
"pref @%[fmtrx]\n\t" // prefetch fmtrx now while we wait (LS)
|
||||
"fmov.d @%[bmtrx]+, XD8\n\t" // bmtrx prefetch should work for here
|
||||
"fmov.d @%[bmtrx]+, XD10\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD12\n\t"
|
||||
"mov %[fmtrx], %[pref_scratch]\n\t" // (MT)
|
||||
"add #32, %[pref_scratch]\n\t" // store offset by 32 in r0 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
|
||||
"fmov.d @%[bmtrx], XD14\n\t"
|
||||
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
|
||||
// front matrix
|
||||
// interleave loads and matrix multiply 4x4
|
||||
"fmov.d @%[fmtrx]+, DR0\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR2\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
|
||||
"ftrv XMTRX, FV0\n\t" // (FE)
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR6\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR8\n\t"
|
||||
"ftrv XMTRX, FV4\n\t"
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR10\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR12\n\t"
|
||||
"ftrv XMTRX, FV8\n\t"
|
||||
|
||||
"fmov.d @%[fmtrx], DR14\n\t" // (LS, but this will stall 'ftrv' for 3 cycles)
|
||||
"fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
|
||||
"ftrv XMTRX, FV12\n\t" // (FE)
|
||||
// Save output in XF regs
|
||||
"frchg\n"
|
||||
: [bmtrx] "+&r" ((unsigned int)matrix1), [fmtrx] "+r" ((unsigned int)matrix2), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
|
||||
: // no inputs
|
||||
: "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
|
||||
);
|
||||
}
|
||||
|
||||
__hot inline void mat_apply_rotate_x(float x) {
|
||||
x *= 10430.37835f;
|
||||
asm volatile(
|
||||
"ftrc %[a], fpul\n\t"
|
||||
"fsca fpul, dr4\n\t"
|
||||
"fldi0 fr8\n\t"
|
||||
"fldi0 fr11\n\t"
|
||||
"fmov fr5, fr10\n\t"
|
||||
"fmov fr4, fr9\n\t"
|
||||
"fneg fr9\n\t"
|
||||
"ftrv xmtrx, fv8\n\t"
|
||||
"fmov fr4, fr6\n\t"
|
||||
"fldi0 fr7\n\t"
|
||||
"fldi0 fr4\n\t"
|
||||
"ftrv xmtrx, fv4\n\t"
|
||||
"fschg\n\t"
|
||||
"fmov dr8, xd8\n\t"
|
||||
"fmov dr10, xd10\n\t"
|
||||
"fmov dr4, xd4\n\t"
|
||||
"fmov dr6, xd6\n\t"
|
||||
"fschg\n"
|
||||
:
|
||||
: [a] "f"(x)
|
||||
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
|
||||
}
|
||||
|
||||
__hot inline void mat_apply_rotate_y(float y) {
|
||||
y *= 10430.37835f;
|
||||
asm volatile(
|
||||
"ftrc %[a], fpul\n\t"
|
||||
"fsca fpul, dr6\n\t"
|
||||
"fldi0 fr9\n\t"
|
||||
"fldi0 fr11\n\t"
|
||||
"fmov fr6, fr8\n\t"
|
||||
"fmov fr7, fr10\n\t"
|
||||
"ftrv xmtrx, fv8\n\t"
|
||||
"fmov fr7, fr4\n\t"
|
||||
"fldi0 fr5\n\t"
|
||||
"fneg fr6\n\t"
|
||||
"fldi0 fr7\n\t"
|
||||
"ftrv xmtrx, fv4\n\t"
|
||||
"fschg\n\t"
|
||||
"fmov dr8, xd8\n\t"
|
||||
"fmov dr10, xd10\n\t"
|
||||
"fmov dr4, xd0\n\t"
|
||||
"fmov dr6, xd2\n\t"
|
||||
"fschg\n"
|
||||
:
|
||||
: [a] "f"(y)
|
||||
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
|
||||
}
|
||||
|
||||
__hot inline void mat_apply_rotate_z(float z) {
|
||||
z *= 10430.37835f;
|
||||
asm volatile(
|
||||
"ftrc %[a], fpul\n\t"
|
||||
"fsca fpul, dr8\n\t"
|
||||
"fldi0 fr10\n\t"
|
||||
"fldi0 fr11\n\t"
|
||||
"fmov fr8, fr5\n\t"
|
||||
"fneg fr8\n\t"
|
||||
"ftrv xmtrx, fv8\n\t"
|
||||
"fmov fr9, fr4\n\t"
|
||||
"fschg\n\t"
|
||||
"fmov dr10, dr6\n\t"
|
||||
"ftrv xmtrx, fv4\n\t"
|
||||
"fmov dr8, xd4\n\t"
|
||||
"fmov dr10, xd6\n\t"
|
||||
"fmov dr4, xd0\n\t"
|
||||
"fmov dr6, xd2\n\t"
|
||||
"fschg\n"
|
||||
:
|
||||
: [a] "f"(z)
|
||||
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
|
||||
}
|
||||
|
||||
# else
|
||||
# ifdef DC_TEXCONV
|
||||
# define mat_apply(a)
|
||||
# define mat_load2(a)
|
||||
# define mat_store2(a)
|
||||
# define mat_identity2()
|
||||
# define mat_transform(a, b, c, d)
|
||||
# define pvr_fog_table_color(a,r,g,b)
|
||||
# define pvr_fog_table_linear(s,e)
|
||||
# else
|
||||
# define mat_load2(a) mat_load(a)
|
||||
# define mat_store2(a) mat_store(a)
|
||||
# define mat_identity2() mat_identity()
|
||||
# endif
|
||||
|
||||
#define mat_trans_single3_nomod(x_, y_, z_, x2, y2, z2) do { \
|
||||
vector_t tmp = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp, &tmp, 1, 0); \
|
||||
z2 = 1.0f / tmp.w; \
|
||||
x2 = tmp.x * z2; \
|
||||
y2 = tmp.y * z2; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_single3_nodiv_nomod(x_, y_, z_, x2, y2, z2) do { \
|
||||
vector_t tmp = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp, &tmp, 1, 0); \
|
||||
z2 = tmp.z; \
|
||||
x2 = tmp.x; \
|
||||
y2 = tmp.y; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
|
||||
vector_t tmp1233123 = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
|
||||
x2 = tmp1233123.x; y2 = tmp1233123.y; z2 = tmp1233123.z; w2 = tmp1233123.w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_nodiv_nomod_zerow(x_, y_, z_, x2, y2, z2, w2) do { \
|
||||
vector_t tmp1233123 = { x_, y_, z_, 0.0f }; \
|
||||
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
|
||||
x2 = tmp1233123.x; y2 = tmp1233123.y; z2 = tmp1233123.z; w2 = tmp1233123.w; \
|
||||
} while(false)
|
||||
|
||||
#define mat_trans_w_nodiv_nomod(x_, y_, z_, w_) do { \
|
||||
vector_t tmp1233123 = { x_, y_, z_, 1.0f }; \
|
||||
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
|
||||
w_ = tmp1233123.w; \
|
||||
} while(false)
|
||||
|
||||
inline void mat_load_3x3(const rw::Matrix* mtx) {
|
||||
memcpy(XMTRX, mtx, sizeof(matrix_t));
|
||||
XMTRX[0][3] = 0.0f;
|
||||
XMTRX[1][3] = 0.0f;
|
||||
XMTRX[2][3] = 0.0f;
|
||||
|
||||
XMTRX[3][0] = 0.0f;
|
||||
XMTRX[3][1] = 0.0f;
|
||||
XMTRX[3][2] = 0.0f;
|
||||
XMTRX[3][3] = 0.0f;
|
||||
}
|
||||
|
||||
inline void mat_load_4x4(const rw::Matrix* mtx) {
|
||||
memcpy(XMTRX, mtx, sizeof(matrix_t));
|
||||
XMTRX[3][3] = 1.0f;
|
||||
}
|
||||
|
||||
inline void mat_transpose(void) {
|
||||
matrix_t tmp;
|
||||
|
||||
for(unsigned i = 0; i < 4; ++i)
|
||||
for(unsigned j = 0; j < 4; ++j)
|
||||
tmp[j][i] = XMTRX[i][j];
|
||||
|
||||
mat_load(&tmp);
|
||||
}
|
||||
|
||||
template<bool FAST_APPROX=true>
|
||||
__hot inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
|
||||
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||
}
|
||||
|
||||
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
|
||||
mat_load(matrix1);
|
||||
mat_apply(matrix2);
|
||||
}
|
||||
|
||||
__always_inline __hot void mat_copy(matrix_t *dst, const matrix_t *src) {
|
||||
mat_load(src);
|
||||
mat_store(dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__hot inline void mat_mult(matrix_t *out, const matrix_t* matrix1, const matrix_t* matrix2) {
|
||||
mat_load_apply(matrix1, matrix2);
|
||||
mat_store2(out);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif /* RWDC_COMMON_H */
|
||||
37
vendor/librw/src/rwbase.h
vendored
37
vendor/librw/src/rwbase.h
vendored
@@ -1,6 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "common_defines.h"
|
||||
#ifdef RW_DC
|
||||
#include "rwdc_common.h"
|
||||
#endif
|
||||
|
||||
#ifndef RW_PS2
|
||||
#include <stdint.h>
|
||||
@@ -253,11 +256,25 @@ inline V3d neg(const V3d &a) { return makeV3d(-a.x, -a.y, -a.z); }
|
||||
inline V3d add(const V3d &a, const V3d &b) { return makeV3d(a.x+b.x, a.y+b.y, a.z+b.z); }
|
||||
inline V3d sub(const V3d &a, const V3d &b) { return makeV3d(a.x-b.x, a.y-b.y, a.z-b.z); }
|
||||
inline V3d scale(const V3d &a, float32 r) { return makeV3d(a.x*r, a.y*r, a.z*r); }
|
||||
inline float32 length(const V3d &v) { return sqrtf(v.x*v.x + v.y*v.y + v.z*v.z); }
|
||||
inline float32 length(const V3d &v) {
|
||||
#ifndef DC_SH4
|
||||
return sqrtf(v.x*v.x + v.y*v.y + v.z*v.z);
|
||||
#else
|
||||
float len;
|
||||
vec3f_length(v.x, v.y, v.z, len);
|
||||
return len;
|
||||
#endif
|
||||
}
|
||||
inline V3d normalize(const V3d &v) { return scale(v, 1.0f/length(v)); }
|
||||
inline V3d setlength(const V3d &v, float32 l) { return scale(v, l/length(v)); }
|
||||
V3d cross(const V3d &a, const V3d &b);
|
||||
inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
|
||||
inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) {
|
||||
#ifdef DC_SH4
|
||||
return fipr(a.x, a.y, a.z, 0.0f, b.x, b.y, b.z, 0.0f);
|
||||
#else
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
#endif
|
||||
}
|
||||
inline V3d lerp(const V3d &a, const V3d &b, float32 r){
|
||||
return makeV3d(a.x + r*(b.x - a.x),
|
||||
a.y + r*(b.y - a.y),
|
||||
@@ -300,9 +317,21 @@ inline Quat makeQuat(float32 w, const V3d &vec) { Quat q = { vec.x, vec.y, vec.z
|
||||
inline Quat add(const Quat &q, const Quat &p) { return makeQuat(q.w+p.w, q.x+p.x, q.y+p.y, q.z+p.z); }
|
||||
inline Quat sub(const Quat &q, const Quat &p) { return makeQuat(q.w-p.w, q.x-p.x, q.y-p.y, q.z-p.z); }
|
||||
inline Quat negate(const Quat &q) { return makeQuat(-q.w, -q.x, -q.y, -q.z); }
|
||||
inline float32 dot(const Quat &q, const Quat &p) { return q.w*p.w + q.x*p.x + q.y*p.y + q.z*p.z; }
|
||||
inline float32 dot(const Quat &q, const Quat &p) {
|
||||
#ifdef DC_SH4
|
||||
return fipr(q.x, q.y, q.z, q.w, p.x, p.y, p.z, p.w);
|
||||
#else
|
||||
return q.w*p.w + q.x*p.x + q.y*p.y + q.z*p.z;
|
||||
#endif
|
||||
}
|
||||
inline Quat scale(const Quat &q, float32 r) { return makeQuat(q.w*r, q.x*r, q.y*r, q.z*r); }
|
||||
inline float32 length(const Quat &q) { return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z); }
|
||||
inline float32 length(const Quat &q) {
|
||||
#ifndef DC_SH4
|
||||
return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z);
|
||||
#else
|
||||
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, q.w));
|
||||
#endif
|
||||
}
|
||||
inline Quat normalize(const Quat &q) { return scale(q, 1.0f/length(q)); }
|
||||
inline Quat conj(const Quat &q) { return makeQuat(q.w, -q.x, -q.y, -q.z); }
|
||||
Quat mult(const Quat &q, const Quat &p);
|
||||
|
||||
Reference in New Issue
Block a user