Compare commits

...

9 Commits

Author SHA1 Message Date
Falco Girgis
2ade727c46 Saving state. 2025-03-27 15:17:11 -05:00
Falco Girgis
c02781fe81 Merge remote-tracking branch 'origin/main' into falco/sh4_math_collision_physics 2025-03-17 18:52:34 -05:00
Falco Girgis
446d899c31 Removec uneccessary 64-byte align for RwMatrix. 2025-03-15 13:20:58 -05:00
Falco Girgis
e0b6ac6ab7 Fixed rw::Matrix inversion routines for 4x4s. 2025-03-15 00:27:15 -05:00
Falco Girgis
6ce11f0006 SKINNING IS FIXED!!! 2025-03-14 23:33:34 -05:00
Falco Girgis
011912c4e3 Everything working except skinning! 2025-03-14 11:56:28 -05:00
Falco Girgis
03106c18c2 Added rwdc_common.h! 2025-03-11 18:36:46 -05:00
Falco Girgis
fea70f5275 FIXED MATFX ENV/SPECULAR MAPS! 2025-03-03 07:39:49 -06:00
Falco Girgis
801756d444 Everything working except matfx env mapping. 2025-03-03 05:17:42 -06:00
36 changed files with 1663 additions and 1192 deletions

151
.vscode/settings.json vendored
View File

@@ -1,151 +0,0 @@
{
"C_Cpp.default.cStandard": "gnu11",
"C_Cpp.default.cppStandard": "gnu++14",
"C_Cpp.default.includePath": [
"src",
"src/animation",
"src/audio",
"src/audio/eax",
"src/audio/oal",
"src/buildings",
"src/collision",
"src/control",
"src/core",
"src/entities",
"src/extras",
"src/fakerw",
"src/math",
"src/modelinfo",
"src/objects",
"src/peds",
"src/renderer",
"src/rw",
"src/save/",
"src/skel/",
"src/skel/glfw",
"src/text",
"src/vehicles",
"src/weapons",
"vendor/librw"
],
"C_Cpp.vcFormat.indent.gotoLabels": "leftmostColumn",
"C_Cpp.vcFormat.space.pointerReferenceAlignment": "right",
"cSpell.enabled": false,
"files.trimFinalNewlines": false,
"files.trimTrailingWhitespace": false,
"files.associations": {
"system_error": "cpp",
"xlocale": "cpp",
"xiosbase": "cpp",
"memory": "cpp",
"mutex": "cpp",
"xmemory": "cpp",
"deque": "cpp",
"initializer_list": "cpp",
"list": "cpp",
"queue": "cpp",
"type_traits": "cpp",
"vector": "cpp",
"xhash": "cpp",
"xstring": "cpp",
"xutility": "cpp",
"iosfwd": "cpp",
"algorithm": "cpp",
"atomic": "cpp",
"iterator": "cpp",
"xtree": "cpp",
"ios": "cpp",
"*.inc": "cpp",
"cstdlib": "cpp",
"ratio": "cpp",
"array": "cpp",
"functional": "cpp",
"tuple": "cpp",
"utility": "cpp",
"*.tcc": "cpp",
"unordered_map": "cpp",
"chrono": "cpp",
"bit": "cpp",
"cctype": "cpp",
"cinttypes": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"concepts": "cpp",
"condition_variable": "cpp",
"csignal": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"map": "cpp",
"set": "cpp",
"string": "cpp",
"exception": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"random": "cpp",
"string_view": "cpp",
"fstream": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"semaphore": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"stop_token": "cpp",
"streambuf": "cpp",
"thread": "cpp",
"typeinfo": "cpp",
"codecvt": "cpp",
"iomanip": "cpp",
"format": "cpp",
"__locale": "cpp",
"optional": "cpp",
"span": "cpp",
"__bit_reference": "cpp",
"__config": "cpp",
"__hash_table": "cpp",
"__node_handle": "cpp",
"__split_buffer": "cpp",
"__threading_support": "cpp",
"__tree": "cpp",
"__verbose_abort": "cpp",
"bitset": "cpp",
"charconv": "cpp",
"complex": "cpp",
"execution": "cpp",
"locale": "cpp",
"variant": "cpp",
"text_encoding": "cpp",
"stack": "cpp",
"unordered_set": "cpp",
"shared_mutex": "cpp",
"forward_list": "cpp",
"cholesky": "cpp",
"core": "cpp",
"eigenvalues": "cpp",
"geometry": "cpp",
"householder": "cpp",
"jacobi": "cpp",
"lu": "cpp",
"qr": "cpp",
"svd": "cpp",
"print": "cpp",
"strstream": "cpp",
"regex": "cpp",
"cassert": "cpp",
"barrier": "cpp",
"coroutine": "cpp",
"future": "cpp",
"latch": "cpp",
"syncstream": "cpp"
}
}

View File

@@ -96,20 +96,24 @@ OBJS_TEXCONV += \
../vendor/librw/src/d3d-x/d3d8render.texconv.o \
../vendor/librw/src/bmp.texconv.o \
../vendor/librw/src/png.texconv.o \
../vendor/librw/src/lodepng/lodepng.texconv.o
../vendor/librw/src/lodepng/lodepng.texconv.o
# Add compilation units to this list to explicity compile them with
# -O3 optimizations, while the rest get the default (-Os) treatment
# to conserve RAM.
OBJS_O3 = \
../vendor/librw/src/dc/rwdc.o \
../src/liberty/core/World.o \
../src/liberty/collision/Collision.o \
../src/liberty/math/math.o \
../src/liberty/math/Matrix.o \
../src/liberty/math/Quaternion.o \
../src/liberty/math/Rect.o \
../src/liberty/math/Vector.o \
../src/core/World.o \
../src/core/ZoneCull.o \
../src/entities/Physical.o \
../src/collision/Collision.o \
../src/peds/PedAI.o \
../src/render/Renderer.o \
../src/math/math.o \
../src/math/Matrix.o \
../src/math/Quaternion.o \
../src/math/Rect.o \
../src/math/Vector.o \
../vendor/librw/src/base.o \
../src/liberty/renderer/Shadows.o
@@ -138,7 +142,7 @@ CXXFLAGS += $(if $(WITH_32MB),-O3,-Os) \
-Wno-multichar -Wno-unused-value -Wno-char-subscripts -Wno-reorder \
-Wno-unused-function -Wno-class-memaccess -fno-permissive \
-fno-asynchronous-unwind-tables -fno-enforce-eh-specs -fno-non-call-exceptions \
-fno-strict-aliasing -fwrapv
-fno-strict-aliasing -fwrapv -fno-PIC
clean-texconv:
-rm -f $(OBJS_TEXCONV)

View File

@@ -381,6 +381,7 @@ INCLUDE = \
-I../src/liberty/skel/win \
\
-I../vendor/librw \
-I../vendor/librw/src/dc \
\
-I../vendor/miniLZO \
\

View File

@@ -1,14 +1,14 @@
#pragma once
struct CColLine
struct alignas(8) CColLine
{
// NB: this has to be compatible with two CVuVectors
CVector p0;
int pad0;
CVector p1;
int pad1;
alignas(8) CVector p0;
float pad0 = 0.0f;
alignas(8) CVector p1;
float pad1 = 0.0f;
CColLine(void) { };
CColLine(void) = default;
CColLine(const CVector &p0, const CVector &p1) { this->p0 = p0; this->p1 = p1; };
void Set(const CVector &p0, const CVector &p1);
};

View File

@@ -1,12 +1,12 @@
#pragma once
struct CColPoint
struct alignas(8) CColPoint
{
CVector point;
int pad1;
alignas(8) CVector point;
float pad1 = 0.0f;
// the surface normal on the surface of point
CVector normal;
int pad2;
alignas(8) CVector normal;
float pad2 = 0.0f;
uint8 surfaceA;
uint8 pieceA;
uint8 surfaceB;

View File

@@ -4,8 +4,6 @@
void
CColSphere::Set(float radius, const CVector &center, uint8 surf, uint8 piece)
{
this->radius = radius;
Set(radius, surf, piece);
this->center = center;
this->surface = surf;
this->piece = piece;
}

View File

@@ -9,5 +9,10 @@ struct CColSphere
float radius;
uint8 surface;
uint8 piece;
void Set(float radius, uint8 surf = SURFACE_DEFAULT, uint8 piece = 0) {
this->radius = radius;
this->surface = surf;
this->piece = piece;
}
void Set(float radius, const CVector &center, uint8 surf = SURFACE_DEFAULT, uint8 piece = 0);
};

View File

@@ -23,6 +23,10 @@
#include "Collision.h"
#include "Frontend.h"
#ifdef DC_SH4
#include "VuCollision.h"
#endif
#ifdef VU_COLLISION
#include "VuCollision.h"
@@ -579,7 +583,7 @@ CCollision::TestLineSphere(const CColLine &line, const CColSphere &sph)
// Sqrt(diffsq) somehow works out to be the distance from that
// midpoint to the intersection points.
// So subtract that and get rid of the awkward scaling:
float f = (-projline - Sqrt(diffsq)) / (2.0f*linesq);
float f = Div((-projline - Sqrt(diffsq)), (2.0f*linesq));
// f should now be in range [0, 1] for [p0, p1]
return f >= 0.0f && f <= 1.0f;
}
@@ -614,7 +618,7 @@ CCollision::TestSphereTriangle(const CColSphere &sphere,
// calculate two orthogonal basis vectors for the triangle
CVector vec2 = vb - va;
float len = vec2.Magnitude();
vec2 = vec2 * (1.0f/len);
vec2 = vec2 * Invert<true, false>(len);
CVector normal;
plane.GetNormal(normal);
CVector vec1 = CrossProduct(vec2, normal);
@@ -732,12 +736,17 @@ CCollision::TestLineOfSight(const CColLine &line, const CMatrix &matrix, CColMod
return false;
#else
static CMatrix matTransform;
CMatrix matTransform;
int i;
// transform line to model space
Invert(matrix, matTransform);
CColLine newline(matTransform * line.p0, matTransform * line.p1);
CColLine newline;
#ifndef DC_SH4
newline.Set(matTransform * line.p0, matTransform * line.p1);
#else
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#endif
// If we don't intersect with the bounding box, no chance on the rest
if(!TestLineBox(newline, model.boundingBox))
@@ -778,12 +787,13 @@ bool
CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq)
{
CVector dist = s1.center - s2.center;
float d = dist.Magnitude() - s2.radius; // distance from s1's center to s2
float mag = dist.Magnitude();
float d = mag - s2.radius; // distance from s1's center to s2
float depth = s1.radius - d; // sphere overlap
if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2
// no collision if sphere is not close enough
if(d*d < mindistsq && d < s1.radius){
dist.Normalise();
dist *= Invert<true, false>(mag);
point.point = s1.center - dist*d;
point.normal = dist;
#ifndef VU_COLLISION
@@ -834,7 +844,7 @@ CCollision::ProcessSphereBox(const CColSphere &sph, const CColBox &box, CColPoin
dist = sph.center - p;
float lensq = dist.MagnitudeSqr();
if(lensq < mindistsq){
point.normal = dist * (1.0f/Sqrt(lensq));
point.normal = dist * (RecipSqrt(lensq));
point.point = sph.center - point.normal;
#ifndef VU_COLLISION
point.surfaceA = sph.surface;
@@ -879,8 +889,9 @@ CCollision::ProcessSphereBox(const CColSphere &sph, const CColBox &box, CColPoin
float lensq = dist.MagnitudeSqr();
if(lensq < mindistsq){
float len = Sqrt(lensq);
float invlen = Invert<true, false>(len);
point.point = p;
point.normal = dist * (1.0f/len);
point.normal = dist * invlen;
#ifndef VU_COLLISION
point.surfaceA = sph.surface;
point.pieceA = sph.piece;
@@ -906,7 +917,7 @@ CCollision::ProcessLineBox(const CColLine &line, const CColBox &box, CColPoint &
// check if points are on opposite sides of min x plane
if((box.min.x - line.p1.x) * (box.min.x - line.p0.x) < 0.0f){
// parameter along line where we intersect
t = (box.min.x - line.p0.x) / (line.p1.x - line.p0.x);
t = Div((box.min.x - line.p0.x), (line.p1.x - line.p0.x));
// y of intersection
y = line.p0.y + (line.p1.y - line.p0.y)*t;
if(y > box.min.y && y < box.max.y){
@@ -923,7 +934,7 @@ CCollision::ProcessLineBox(const CColLine &line, const CColBox &box, CColPoint &
// max x plane
if((line.p1.x - box.max.x) * (line.p0.x - box.max.x) < 0.0f){
t = (line.p0.x - box.max.x) / (line.p0.x - line.p1.x);
t = Div((line.p0.x - box.max.x), (line.p0.x - line.p1.x));
y = line.p0.y + (line.p1.y - line.p0.y)*t;
if(y > box.min.y && y < box.max.y){
z = line.p0.z + (line.p1.z - line.p0.z)*t;
@@ -938,7 +949,7 @@ CCollision::ProcessLineBox(const CColLine &line, const CColBox &box, CColPoint &
// min y plne
if((box.min.y - line.p0.y) * (box.min.y - line.p1.y) < 0.0f){
t = (box.min.y - line.p0.y) / (line.p1.y - line.p0.y);
t = Div((box.min.y - line.p0.y), (line.p1.y - line.p0.y));
x = line.p0.x + (line.p1.x - line.p0.x)*t;
if(x > box.min.x && x < box.max.x){
z = line.p0.z + (line.p1.z - line.p0.z)*t;
@@ -953,7 +964,7 @@ CCollision::ProcessLineBox(const CColLine &line, const CColBox &box, CColPoint &
// max y plane
if((line.p0.y - box.max.y) * (line.p1.y - box.max.y) < 0.0f){
t = (line.p0.y - box.max.y) / (line.p0.y - line.p1.y);
t = Div((line.p0.y - box.max.y), (line.p0.y - line.p1.y));
x = line.p0.x + (line.p1.x - line.p0.x)*t;
if(x > box.min.x && x < box.max.x){
z = line.p0.z + (line.p1.z - line.p0.z)*t;
@@ -968,7 +979,7 @@ CCollision::ProcessLineBox(const CColLine &line, const CColBox &box, CColPoint &
// min z plne
if((box.min.z - line.p0.z) * (box.min.z - line.p1.z) < 0.0f){
t = (box.min.z - line.p0.z) / (line.p1.z - line.p0.z);
t = Div((box.min.z - line.p0.z), (line.p1.z - line.p0.z));
x = line.p0.x + (line.p1.x - line.p0.x)*t;
if(x > box.min.x && x < box.max.x){
y = line.p0.y + (line.p1.y - line.p0.y)*t;
@@ -983,7 +994,7 @@ CCollision::ProcessLineBox(const CColLine &line, const CColBox &box, CColPoint &
// max z plane
if((line.p0.z - box.max.z) * (line.p1.z - box.max.z) < 0.0f){
t = (line.p0.z - box.max.z) / (line.p0.z - line.p1.z);
t = Div((line.p0.z - box.max.z), (line.p0.z - line.p1.z));
x = line.p0.x + (line.p1.x - line.p0.x)*t;
if(x > box.min.x && x < box.max.x){
y = line.p0.y + (line.p1.y - line.p0.y)*t;
@@ -1029,7 +1040,7 @@ CCollision::ProcessLineSphere(const CColLine &line, const CColSphere &sphere, CC
if(diffsq < 0.0f)
return false;
// point of first intersection, in range [0,1] between p0 and p1
float t = (projline - Sqrt(diffsq)) / linesq;
float t = Div<true, false>((projline - Sqrt(diffsq)), linesq);
// if not on line or beyond mindist, no intersection
if(t < 0.0f || t > 1.0f || t >= mindist)
return false;
@@ -1083,7 +1094,7 @@ CCollision::ProcessVerticalLineTriangle(const CColLine &line,
// intersection parameter on line
float h = (line.p1 - p0).z;
t = -plane.CalcPoint(p0) / (h * normal.z);
t = Div(-plane.CalcPoint(p0), (h * normal.z));
// early out if we're beyond the mindist
if(t >= mindist)
return false;
@@ -1206,7 +1217,7 @@ CCollision::IsStoredPolyStillValidVerticalLine(const CVector &pos, float z, CCol
// intersection parameter on line
CVector normal;
plane.GetNormal(normal);
t = -plane.CalcPoint(p0) / DotProduct(p1 - p0, normal);
t = Div(-plane.CalcPoint(p0), DotProduct(p1 - p0, normal));
// find point of intersection
CVector p = p0 + (p1-p0)*t;
@@ -1303,7 +1314,7 @@ CCollision::ProcessLineTriangle(const CColLine &line,
#endif
// intersection parameter on line
t = -plane.CalcPoint(line.p0) / p0dist;
t = Div(-plane.CalcPoint(line.p0), p0dist);
// early out if we're beyond the mindist
if(t >= mindist)
@@ -1415,7 +1426,7 @@ CCollision::ProcessSphereTriangle(const CColSphere &sphere,
plane.GetNormal(normal);
CVector vec2 = vb - va;
float len = vec2.Magnitude();
vec2 = vec2 * (1.0f/len);
vec2 = vec2 * Invert<true, false>(len);
CVector vec1 = CrossProduct(vec2, normal);
// We know A has local coordinate [0,0] and B has [0,len].
@@ -1572,12 +1583,17 @@ CCollision::ProcessLineOfSight(const CColLine &line,
}
return false;
#else
static CMatrix matTransform;
CMatrix matTransform;
int i;
// transform line to model space
Invert(matrix, matTransform);
CColLine newline(matTransform * line.p0, matTransform * line.p1);
CColLine newline;
#ifdef DC_SH4
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#else
newline.Set(matTransform * line.p0, matTransform * line.p1);
#endif
// If we don't intersect with the bounding box, no chance on the rest
if(!TestLineBox(newline, model.boundingBox))
@@ -1601,8 +1617,20 @@ CCollision::ProcessLineOfSight(const CColLine &line,
}
if(coldist < mindist){
#ifndef DC_SH4
point.point = matrix * point.point;
point.normal = Multiply3x3(matrix, point.normal);
#else
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
mat_trans_single3_nodiv(point.point.x,
point.point.y,
point.point.z);
float w = 0;
mat_trans_nodiv(point.normal.x,
point.normal.y,
point.normal.z,
w);
#endif
mindist = coldist;
return true;
}
@@ -1739,9 +1767,14 @@ CCollision::ProcessVerticalLine(const CColLine &line,
// transform line to model space
// Why does the game seem to do this differently than above?
CColLine newline(MultiplyInverse(matrix, line.p0), MultiplyInverse(matrix, line.p1));
newline.p1.x = newline.p0.x;
newline.p1.y = newline.p0.y;
CMatrix matTransform;
Invert(matrix, matTransform);
CColLine newline;
#ifndef DC_SH4
newline.Set(matTransform * line.p0, matTransform * line.p1);
#else
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#endif
if(!TestVerticalLineBox(newline, model.boundingBox))
return false;
@@ -1765,13 +1798,31 @@ CCollision::ProcessVerticalLine(const CColLine &line,
}
if(coldist < mindist){
#ifndef DC_SH4
point.point = matrix * point.point;
point.normal = Multiply3x3(matrix, point.normal);
#else
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
mat_trans_single3_nodiv(point.point.x,
point.point.y,
point.point.z);
float w = 0;
mat_trans_nodiv(point.normal.x,
point.normal.y,
point.normal.z,
w);
#endif
if(TempStoredPoly.valid && poly){
*poly = TempStoredPoly;
#ifndef DC_SH4
poly->verts[0] = matrix * poly->verts[0];
poly->verts[1] = matrix * poly->verts[1];
poly->verts[2] = matrix * poly->verts[2];
#else
mat_trans_single3_nodiv(poly->verts[0].x, poly->verts[0].y, poly->verts[0].z);
mat_trans_single3_nodiv(poly->verts[1].x, poly->verts[1].y, poly->verts[1].z);
mat_trans_single3_nodiv(poly->verts[2].x, poly->verts[2].y, poly->verts[2].z);
#endif
}
mindist = coldist;
return true;
@@ -2107,15 +2158,27 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
return numCollisions; // sphere collisions
#else
static int aSphereIndicesA[MAXNUMSPHERES];
static int aLineIndicesA[MAXNUMLINES];
static int aSphereIndicesB[MAXNUMSPHERES];
static int aBoxIndicesB[MAXNUMBOXES];
static int aTriangleIndicesB[MAXNUMTRIS];
static bool aCollided[MAXNUMLINES];
static CColSphere aSpheresA[MAXNUMSPHERES];
static CColLine aLinesA[MAXNUMLINES];
static CMatrix matAB, matBA;
#if 0
static auto *aSpheresA = reinterpret_cast<CColSphere*>((uintptr_t)OCRAM);
static auto *aLinesA = reinterpret_cast<CColLine *>((uintptr_t)aSpheresA + sizeof(CColSphere) * MAXNUMSPHERES);
static auto *aSphereIndicesA = reinterpret_cast<uint16_t *>((uintptr_t)aLinesA + sizeof(CColLine) * MAXNUMLINES);
static auto *aLineIndicesA = reinterpret_cast<uint16_t *>((uintptr_t)aSphereIndicesA + sizeof(uint16_t) * MAXNUMSPHERES);
static auto *aSphereIndicesB = reinterpret_cast<uint16_t *>((uintptr_t)aLineIndicesA + sizeof(uint16_t) * MAXNUMLINES);
static auto *aBoxIndicesB = reinterpret_cast<uint16_t *>((uintptr_t)aSphereIndicesB + sizeof(uint16_t) * MAXNUMSPHERES);
static auto *aTriangleIndicesB = reinterpret_cast<uint16_t *>((uintptr_t)aBoxIndicesB + sizeof(uint16_t) * MAXNUMBOXES);
static auto *aCollided = reinterpret_cast<bool *>((uintptr_t)aTriangleIndicesB + sizeof(uint16_t) * MAXNUMTRIS);
CMatrix matAB, matBA;
#else
static CColSphere aSpheresA[MAXNUMSPHERES];
static CColLine aLinesA[MAXNUMLINES];
static int aSphereIndicesA[MAXNUMSPHERES];
static int aLineIndicesA[MAXNUMLINES];
static int aSphereIndicesB[MAXNUMSPHERES];
static int aBoxIndicesB[MAXNUMBOXES];
static int aTriangleIndicesB[MAXNUMTRIS];
static bool aCollided[MAXNUMLINES];
static CMatrix matAB, matBA;
#endif
CColSphere s;
int i, j;
@@ -2128,20 +2191,54 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
CColSphere bsphereAB; // bounding sphere of A in B space
bsphereAB.radius = modelA.boundingSphere.radius;
#ifndef DC_SH4
bsphereAB.center = matAB * modelA.boundingSphere.center;
#else
/* No need to reload the matrix, since it's already banked.
mat_load(reinterpret_cast<const matrix_t *>(&matAB)); */
mat_trans_single3_nodiv_nomod(modelA.boundingSphere.center.x,
modelA.boundingSphere.center.y,
modelA.boundingSphere.center.z,
bsphereAB.center.x,
bsphereAB.center.y,
bsphereAB.center.z);
#endif
if(!TestSphereBox(bsphereAB, modelB.boundingBox))
return 0;
// B to A space
matBA = Invert(matrixA, matBA);
matBA *= matrixB;
// transform modelA's spheres and lines to B space
for(i = 0; i < modelA.numSpheres; i++){
CColSphere &s = modelA.spheres[i];
#ifndef DC_SH4
aSpheresA[i].Set(s.radius, matAB * s.center, s.surface, s.piece);
#else
auto &d = aSpheresA[i];
mat_trans_single3_nodiv_nomod(s.center.x, s.center.y, s.center.z,
d.center.x, d.center.y, d.center.z);
d.Set(s.radius, s.surface, s.piece);
#endif
}
for(i = 0; i < modelA.numLines; i++)
for(i = 0; i < modelA.numLines; i++) {
#ifndef DC_SH4
aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1);
#else
mat_trans_single3_nodiv_nomod(modelA.lines[i].p0.x,
modelA.lines[i].p0.y,
modelA.lines[i].p0.z,
aLinesA[i].p0.x,
aLinesA[i].p0.y,
aLinesA[i].p0.z);
mat_trans_single3_nodiv_nomod(modelA.lines[i].p1.x,
modelA.lines[i].p1.y,
modelA.lines[i].p1.z,
aLinesA[i].p1.x,
aLinesA[i].p1.y,
aLinesA[i].p1.z);
#endif
}
// Test them against model B's bounding volumes
int numSpheresA = 0;
@@ -2160,9 +2257,25 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
int numSpheresB = 0;
int numBoxesB = 0;
int numTrianglesB = 0;
// B to A space
matBA = Invert(matrixA, matBA);
matBA *= matrixB;
#ifdef DC_SH4
/* No need to reload the matrix, since it's already banked.
mat_load(reinterpret_cast<const matrix_t *>(&matBA)); */
#endif
for(i = 0; i < modelB.numSpheres; i++){
s.radius = modelB.spheres[i].radius;
#ifndef DC_SH4
s.center = matBA * modelB.spheres[i].center;
#else
mat_trans_single3_nodiv_nomod(modelB.spheres[i].center.x,
modelB.spheres[i].center.y,
modelB.spheres[i].center.z,
s.center.x,
s.center.y,
s.center.z);
#endif
if(TestSphereBox(s, modelA.boundingBox))
aSphereIndicesB[numSpheresB++] = i;
}
@@ -2209,9 +2322,23 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
if(hasCollided)
numCollisions++;
}
#ifdef DC_SH4
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrixB)));
#endif
for(i = 0; i < numCollisions; i++){
#ifndef DC_SH4
spherepoints[i].point = matrixB * spherepoints[i].point;
spherepoints[i].normal = Multiply3x3(matrixB, spherepoints[i].normal);
#else
mat_trans_single3_nodiv(spherepoints[i].point.x,
spherepoints[i].point.y,
spherepoints[i].point.z);
mat_trans_vec3(spherepoints[i].normal.x,
spherepoints[i].normal.y,
spherepoints[i].normal.z);
#endif
}
// And the same thing for the lines in A
@@ -2242,8 +2369,17 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
for(i = 0; i < numLinesA; i++)
if(aCollided[i]){
j = aLineIndicesA[i];
#ifndef DC_SH4
linepoints[j].point = matrixB * linepoints[j].point;
linepoints[j].normal = Multiply3x3(matrixB, linepoints[j].normal);
#else
mat_trans_single3_nodiv(linepoints[j].point.x,
linepoints[j].point.y,
linepoints[j].point.z);
mat_trans_vec3(linepoints[j].normal.x,
linepoints[j].normal.y,
linepoints[j].normal.z);
#endif
}
return numCollisions; // sphere collisions
@@ -2265,7 +2401,7 @@ CCollision::DistToLine(const CVector *l0, const CVector *l1, const CVector *poin
if(dot <= 0.0f) return (*point - *l0).Magnitude();
if(dot >= lensq) return (*point - *l1).Magnitude();
// distance to line
float distSqr = (*point - *l0).MagnitudeSqr() - dot * dot / lensq;
float distSqr = (*point - *l0).MagnitudeSqr() - dot * Div<true, false>(dot, lensq);
if(distSqr <= 0.f) return 0.f;
return Sqrt(distSqr);
}
@@ -2282,7 +2418,7 @@ CCollision::DistToLine(const CVector *l0, const CVector *l1, const CVector *poin
else if(dot >= lensq)
closest = *l1;
else
closest = *l0 + (*l1 - *l0)*(dot/lensq);
closest = *l0 + (*l1 - *l0)*(Div<true, false>(dot, lensq));
// this is the distance
return (*point - closest).Magnitude();
}

View File

@@ -805,6 +805,7 @@ CCarCtrl::CountCarsOfType(int32 mi)
void
CCarCtrl::UpdateCarOnRails(CVehicle* pVehicle)
{
// Bullshit inner lambda to prevent ICE due to interaction between register allocator and fast-math.
if (pVehicle->AutoPilot.m_nTempAction == TEMPACT_WAIT){
pVehicle->SetMoveSpeed(0.0f, 0.0f, 0.0f);
pVehicle->AutoPilot.ModifySpeed(0.0f);
@@ -833,24 +834,27 @@ CCarCtrl::UpdateCarOnRails(CVehicle* pVehicle)
CVector positionOnNextLinkIncludingLane(
pNextLink->GetX() + ((pVehicle->AutoPilot.m_nNextLane + pNextLink->OneWayLaneOffset()) * LANE_WIDTH) * nextPathLinkForwardY,
pNextLink->GetY() - ((pVehicle->AutoPilot.m_nNextLane + pNextLink->OneWayLaneOffset()) * LANE_WIDTH) * nextPathLinkForwardX,
0.0f);
0.0f);
CVector directionCurrentLink(currentPathLinkForwardX, currentPathLinkForwardY, 0.0f);
CVector directionNextLink(nextPathLinkForwardX, nextPathLinkForwardY, 0.0f);
CVector positionIncludingCurve;
CVector directionIncludingCurve;
CCurves::CalcCurvePoint(
&positionOnCurrentLinkIncludingLane,
&positionOnNextLinkIncludingLane,
&directionCurrentLink,
&directionNextLink,
GetPositionAlongCurrentCurve(pVehicle),
pVehicle->AutoPilot.m_nTimeToSpendOnCurrentCurve,
&positionIncludingCurve,
&directionIncludingCurve
);
positionIncludingCurve.z = 15.0f;
DragCarToPoint(pVehicle, &positionIncludingCurve);
pVehicle->SetMoveSpeed(directionIncludingCurve / GAME_SPEED_TO_CARAI_SPEED);
[&](void) __attribute__((noinline)) { // Prevents an ICE on fast-math by breaking function into mutiple subfunctions for register allocator.
CCurves::CalcCurvePoint(
&positionOnCurrentLinkIncludingLane,
&positionOnNextLinkIncludingLane,
&directionCurrentLink,
&directionNextLink,
GetPositionAlongCurrentCurve(pVehicle),
pVehicle->AutoPilot.m_nTimeToSpendOnCurrentCurve,
&positionIncludingCurve,
&directionIncludingCurve
);
positionIncludingCurve.z = 15.0f;
DragCarToPoint(pVehicle, &positionIncludingCurve);
pVehicle->SetMoveSpeed(directionIncludingCurve * dc::Invert<false>(GAME_SPEED_TO_CARAI_SPEED));
}();
}
float
@@ -2036,10 +2040,10 @@ void CCarCtrl::DragCarToPoint(CVehicle* pVehicle, CVector* pPoint)
float angleZ = Atan2((actualAheadZ - actualBehindZ) / 3, 1.0f);
float cosZ = Cos(angleZ);
float sinZ = Sin(angleZ);
pVehicle->GetRight() = CVector(posTarget.y - midPos.y, -(posTarget.x - midPos.x), 0.0f) / 3;
pVehicle->GetRight() = CVector(posTarget.y - midPos.y, -(posTarget.x - midPos.x), 0.0f) * (1.0f / 3.0f);
pVehicle->GetForward() = CVector(-cosZ * pVehicle->GetRight().y, cosZ * pVehicle->GetRight().x, sinZ);
pVehicle->GetUp() = CrossProduct(pVehicle->GetRight(), pVehicle->GetForward());
pVehicle->SetPosition((CVector(midPos.x, midPos.y, actualBehindZ) + CVector(posTarget.x, posTarget.y, actualAheadZ)) / 2);
pVehicle->SetPosition((CVector(midPos.x, midPos.y, actualBehindZ) + CVector(posTarget.x, posTarget.y, actualAheadZ)) * 0.5f);
pVehicle->GetMatrix().GetPosition().z += pVehicle->GetHeightAboveRoad();
}

View File

@@ -526,7 +526,7 @@ void
CPathFind::PreparePathDataForType(uint8 type, CTempNode *tempnodes, CPathInfoForObject *objectpathinfo,
float maxdist, CTempDetachedNode *detachednodes, int32 numDetached)
{
static CVector CoorsXFormed;
CVector CoorsXFormed;
int i, j, k, l;
int l1, l2;
int start;

View File

@@ -4009,7 +4009,7 @@ void
CCam::Process_ModelView(const CVector &CameraTarget, float, float, float)
{
CVector TargetCoors = CameraTarget;
float Angle = Atan2(Front.x, Front.y);
float Angle = Atan2<false>(Front.x, Front.y);
FOV = DefaultFOV;
Angle += CPad::GetPad(0)->GetLeftStickX()/1280.0f;

View File

@@ -1268,6 +1268,8 @@ CWorld::FindObjectsIntersectingAngledCollisionBoxSectorList(CPtrList &list, cons
int16 *nEntitiesFound, int16 maxEntitiesToFind,
CEntity **aEntities)
{
mat_load(reinterpret_cast<const matrix_t*>(&matrix));
mat_transpose();
for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) {
CEntity *pEntity = (CEntity *)pNode->item;
if(pEntity->m_scanCode != GetCurrentScanCode()) {
@@ -1275,7 +1277,8 @@ CWorld::FindObjectsIntersectingAngledCollisionBoxSectorList(CPtrList &list, cons
CColSphere sphere;
CVector vecDistance = pEntity->GetPosition() - position;
sphere.radius = pEntity->GetBoundRadius();
sphere.center = Multiply3x3(vecDistance, matrix);
mat_trans_normal3_nomod(vecDistance.x, vecDistance.y, vecDistance.z,
sphere.center.x, sphere.center.y, sphere.center.z);
if(CCollision::TestSphereBox(sphere, boundingBox) && *nEntitiesFound < maxEntitiesToFind) {
if(aEntities) aEntities[*nEntitiesFound] = pEntity;
++*nEntitiesFound;
@@ -1906,6 +1909,9 @@ CWorld::Process(void)
movingEnt->UpdateRwFrame();
}
} else {
//ocram_enter();
bNoMoreCollisionTorque = false;
for(CPtrNode *node = ms_listMovingEntityPtrs.first; node; node = node->next) {
CEntity *movingEnt = (CEntity *)node->item;
@@ -1936,6 +1942,9 @@ CWorld::Process(void)
if(!movingEnt->bIsInSafePosition) { movingEnt->bIsStuck = true; }
}
}
//ocram_leave();
bSecondShift = false;
for(CPtrNode *node = ms_listMovingEntityPtrs.first; node; node = node->next) {
CEntity *movingEnt = (CEntity *)node->item;

View File

@@ -15,6 +15,10 @@
#include "Debug.h"
#include "Renderer.h"
#include "VuVector.h"
#include <array>
int32 CCullZones::NumCullZones;
CCullZone CCullZones::aZones[NUMCULLZONES];
int32 CCullZones::NumAttributeZones;
@@ -1496,10 +1500,22 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE
else
boundMaxZ += extraDist;
CVector vecMin = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMinZ);
CVector vecMaxX = entity->GetMatrix() * CVector(boundMaxX, boundMinY, boundMinZ);
CVector vecMaxY = entity->GetMatrix() * CVector(boundMinX, boundMaxY, boundMinZ);
CVector vecMaxZ = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMaxZ);
std::array<CVector, 4> inVecs = {
CVector(boundMinX, boundMinY, boundMinZ),
CVector(boundMaxX, boundMinY, boundMinZ),
CVector(boundMinX, boundMaxY, boundMinZ),
CVector(boundMinX, boundMinY, boundMaxZ)
};
std::array<CVector, 4> outVecs;
auto &[vecMin, vecMaxX, vecMaxY, vecMaxZ] = outVecs;
TransformPoints(reinterpret_cast<CVuVector*>(&outVecs),
4,
entity->GetMatrix(),
inVecs.data(),
sizeof(CVector));
CVector dirx = vecMaxX - vecMin;
CVector diry = vecMaxY - vecMin;
CVector dirz = vecMaxZ - vecMin;
@@ -1520,22 +1536,23 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE
float distToZone = CalcDistToCullZone(entity->GetPosition().x, entity->GetPosition().y)/15.0f;
distToZone = Max(distToZone, 7.0f);
int numX = (boundMaxX - boundMinX)/distToZone + 2.0f;
int numY = (boundMaxY - boundMinY)/distToZone + 2.0f;
int numZ = (boundMaxZ - boundMinZ)/distToZone + 2.0f;
float invDistToZone = Invert<true, false>(distToZone);
int numX = (boundMaxX - boundMinX)*invDistToZone + 2.0f;
int numY = (boundMaxY - boundMinY)*invDistToZone + 2.0f;
int numZ = (boundMaxZ - boundMinZ)*invDistToZone + 2.0f;
float stepX = 1.0f/(numX-1);
float stepY = 1.0f/(numY-1);
float stepZ = 1.0f/(numZ-1);
float stepX = Invert<true, false>(numX-1);
float stepY = Invert<true, false>(numY-1);
float stepZ = Invert<true, false>(numZ-1);
float midX = (boundMaxX + boundMinX)/2.0f;
float midY = (boundMaxY + boundMinY)/2.0f;
float midZ = (boundMaxZ + boundMinZ)/2.0f;
// check both xy planes
CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ);
mid.z += 0.1f;
for(int i = 0; i < NumTestPoints; i++){
CVector testPoint = aTestPoints[i];
CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ);
mid.z += 0.1f;
if(DoThoroughLineTest(testPoint, mid, entity))
return true;

View File

@@ -82,9 +82,6 @@
#define rwVENDORID_ROCKSTAR 0x0253F2
__always_inline auto Max(auto a, auto b) { return ((a > b)? a : b); }
__always_inline auto Min(auto a, auto b) { return ((a < b)? a : b); }
// Use this to add const that wasn't there in the original code
#define Const const
@@ -299,15 +296,6 @@ extern int strcasecmp(const char *str1, const char *str2);
extern wchar *AllocUnicode(const char*src);
template<typename T>
__always_inline T Clamp(T v, auto low, auto high) {
return std::clamp(v, static_cast<T>(low), static_cast<T>(high));
}
__always_inline auto Clamp2(auto v, auto center, auto radius) {
return (v > center) ? Min(v, center + radius) : Max(v, center - radius);
}
#define SQR(x) ((x) * (x))
__always_inline auto sq(auto x) { return SQR(x); }
@@ -418,15 +406,8 @@ template<int s, int t> struct check_size {
#endif
#define BIT(num) (1<<(num))
#define ABS(a) std::abs(a)
#define ABS(a) Abs(a)
__always_inline auto norm(auto value, auto min, auto max) {
return (Clamp(value, min, max) - min) / (max - min);
}
// we use std::lerp now
//#define lerp(norm, min, max) ( (norm) * ((max) - (min)) + (min) )
#define STRINGIFY(x) #x
#define STR(x) STRINGIFY(x)
#define CONCAT_(x,y) x##y
#define CONCAT(x,y) CONCAT_(x,y)

View File

@@ -436,7 +436,7 @@ CPhysical::ApplyTurnSpeed(void)
void
CPhysical::ApplyMoveForce(float jx, float jy, float jz)
{
m_vecMoveSpeed += CVector(jx, jy, jz)*(1.0f/m_fMass);
m_vecMoveSpeed += CVector(jx, jy, jz)*Invert<true, false>(m_fMass);
}
void
@@ -444,13 +444,13 @@ CPhysical::ApplyTurnForce(float jx, float jy, float jz, float px, float py, floa
{
CVector com = Multiply3x3(GetMatrix(), m_vecCentreOfMass);
CVector turnimpulse = CrossProduct(CVector(px, py, pz)-com, CVector(jx, jy, jz));
m_vecTurnSpeed += turnimpulse*(1.0f/m_fTurnMass);
m_vecTurnSpeed += turnimpulse*Invert<true, false>(m_fTurnMass);
}
void
CPhysical::ApplyFrictionMoveForce(float jx, float jy, float jz)
{
m_vecMoveFriction += CVector(jx, jy, jz)*(1.0f/m_fMass);
m_vecMoveFriction += CVector(jx, jy, jz)*Invert<true, false>(m_fMass);
}
void
@@ -458,7 +458,7 @@ CPhysical::ApplyFrictionTurnForce(float jx, float jy, float jz, float px, float
{
CVector com = Multiply3x3(GetMatrix(), m_vecCentreOfMass);
CVector turnimpulse = CrossProduct(CVector(px, py, pz)-com, CVector(jx, jy, jz));
m_vecTurnFriction += turnimpulse*(1.0f/m_fTurnMass);
m_vecTurnFriction += turnimpulse*Invert<true, false>(m_fTurnMass);
}
bool
@@ -488,11 +488,11 @@ CPhysical::ApplySpringDampening(float damping, CVector &springDir, CVector &poin
float impulse = -damping * (speedA + speedB)/2.0f * m_fMass * step * 0.53f;
// what is this?
float a = m_fTurnMass / ((point.MagnitudeSqr() + 1.0f) * 2.0f * m_fMass);
float a = Div<true, false>(m_fTurnMass, ((point.MagnitudeSqr() + 1.0f) * 2.0f * m_fMass));
a = Min(a, 1.0f);
float b = Abs(impulse / (speedB * m_fMass));
float b = Abs(Div<true, false>(impulse, (speedB * m_fMass)));
if(a < b)
impulse *= a/b;
impulse *= Div<true, false>(a, b);
ApplyMoveForce(springDir*impulse);
ApplyTurnForce(springDir*impulse, point);
@@ -523,7 +523,7 @@ CPhysical::ApplyAirResistance(void)
m_vecMoveSpeed *= f;
m_vecTurnSpeed *= f;
}else{
float f = Pow(1.0f/Abs(m_fAirResistance*0.5f*m_vecMoveSpeed.MagnitudeSqr() + 1.0f), CTimer::GetTimeStep());
float f = Pow(Invert<false, true>(Abs(m_fAirResistance*0.5f*m_vecMoveSpeed.MagnitudeSqr() + 1.0f)), CTimer::GetTimeStep());
m_vecMoveSpeed *= f;
m_vecTurnSpeed *= 0.99f;
}
@@ -551,7 +551,7 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
if(A->bPedPhysics){
if(A->IsPed() && ((CPed*)A)->IsPlayer() && B->IsVehicle() &&
(B->GetStatus() == STATUS_ABANDONED || B->GetStatus() == STATUS_WRECKED || A->bHasHitWall))
massFactorB = 2200.0f / B->m_fMass;
massFactorB = Div<true, false>(2200.0f, B->m_fMass);
else
massFactorB = 10.0f;
@@ -678,7 +678,7 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
float mA = A->m_fMass*massFactorA;
float mB = B->GetMassTweak(pointposB, colpoint.normal, massFactorB);
float speedSum = (mB*speedB + mA*speedA)/(mA + mB);
float speedSum = Div<true, false>((mB*speedB + mA*speedA), (mA + mB));
if(speedA < speedSum){
if(A->bHasHitWall)
eA = speedSum;
@@ -690,8 +690,8 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
eB = speedSum - (speedB - speedSum) * (A->m_fElasticity+B->m_fElasticity)/2.0f;
impulseA = (eA - speedA) * mA;
impulseB = -(eB - speedB) * mB;
CVector fA = colpoint.normal*(impulseA/massFactorA);
CVector fB = colpoint.normal*(-impulseB/massFactorB);
CVector fA = colpoint.normal*(Div<true, false>(impulseA, massFactorA));
CVector fB = colpoint.normal* -Div<true, false>(impulseB, massFactorB);
if(!A->bInfiniteMass){
if(fA.z < 0.0f) fA.z = 0.0f;
if(ispedcontactB){
@@ -713,7 +713,7 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
float mA = A->GetMassTweak(pointposA, colpoint.normal, massFactorA);
float mB = B->m_fMass*massFactorB;
float speedSum = (mB*speedB + mA*speedA)/(mA + mB);
float speedSum = Div<true, false>((mB*speedB + mA*speedA), (mA + mB));
if(speedA < speedSum){
if(A->bHasHitWall)
eA = speedSum;
@@ -725,8 +725,8 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
eB = speedSum - (speedB - speedSum) * (A->m_fElasticity+B->m_fElasticity)/2.0f;
impulseA = (eA - speedA) * mA;
impulseB = -(eB - speedB) * mB;
CVector fA = colpoint.normal*(impulseA/massFactorA);
CVector fB = colpoint.normal*(-impulseB/massFactorB);
CVector fA = colpoint.normal*(Div<true, false>(impulseA, massFactorA));
CVector fB = colpoint.normal* -Div<true, false>(impulseB, massFactorB);
if(!A->bInfiniteMass && !ispedcontactA){
if(fA.z < 0.0f) fA.z = 0.0f;
A->ApplyMoveForce(fA);
@@ -753,7 +753,7 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
speedB = DotProduct(B->GetSpeed(pointposB), colpoint.normal);
float mA = A->GetMassTweak(pointposA, colpoint.normal, massFactorA);
float mB = B->GetMassTweak(pointposB, colpoint.normal, massFactorB);
float speedSum = (mB*speedB + mA*speedA)/(mA + mB);
float speedSum = Div<true, false>((mB*speedB + mA*speedA), (mA + mB));
if(speedA < speedSum){
if(A->bHasHitWall)
eA = speedSum;
@@ -765,8 +765,8 @@ CPhysical::ApplyCollision(CPhysical *B, CColPoint &colpoint, float &impulseA, fl
eB = speedSum - (speedB - speedSum) * (A->m_fElasticity+B->m_fElasticity)/2.0f;
impulseA = (eA - speedA) * mA;
impulseB = -(eB - speedB) * mB;
CVector fA = colpoint.normal*(impulseA/massFactorA);
CVector fB = colpoint.normal*(-impulseB/massFactorB);
CVector fA = colpoint.normal*(Div<true, false>(impulseA, massFactorA));
CVector fB = colpoint.normal* -Div<true, false>(impulseB, massFactorB);
if(A->IsVehicle() && !A->bHasHitWall){
fA.x *= 1.4f;
fA.y *= 1.4f;
@@ -852,14 +852,14 @@ CPhysical::ApplyCollisionAlt(CEntity *B, CColPoint &colpoint, float &impulse, CV
if(IsVehicle() &&
(!bHasHitWall ||
!(m_vecMoveSpeed.MagnitudeSqr() > 0.1 || !(B->IsBuilding() || ((CPhysical*)B)->bInfiniteMass))))
moveSpeed += vImpulse * 1.2f * (1.0f/m_fMass);
moveSpeed += vImpulse * 1.2f * Invert<true, false>(m_fMass);
else
moveSpeed += vImpulse * (1.0f/m_fMass);
moveSpeed += vImpulse * Invert<true, false>(m_fMass);
// ApplyTurnForce
CVector com = Multiply3x3(GetMatrix(), m_vecCentreOfMass);
CVector turnimpulse = CrossProduct(pointpos-com, vImpulse);
turnSpeed += turnimpulse*(1.0f/m_fTurnMass);
turnSpeed += turnimpulse*Invert<true, false>(m_fTurnMass);
return true;
}
@@ -893,10 +893,10 @@ CPhysical::ApplyFriction(CPhysical *B, float adhesiveLimit, CColPoint &colpoint)
frictionDir = vOtherSpeedA;
frictionDir.Normalise();
#else
frictionDir = vOtherSpeedA * (1.0f/fOtherSpeedA);
frictionDir = vOtherSpeedA * Invert<true, false>(fOtherSpeedA);
#endif
speedSum = (B->m_fMass*fOtherSpeedB + A->m_fMass*fOtherSpeedA)/(B->m_fMass + A->m_fMass);
speedSum = Div<true, false>(B->m_fMass*fOtherSpeedB + A->m_fMass*fOtherSpeedA, B->m_fMass + A->m_fMass);
if(fOtherSpeedA > speedSum){
impulseA = (speedSum - fOtherSpeedA) * A->m_fMass;
impulseB = (speedSum - fOtherSpeedB) * B->m_fMass;
@@ -929,10 +929,10 @@ CPhysical::ApplyFriction(CPhysical *B, float adhesiveLimit, CColPoint &colpoint)
frictionDir = vOtherSpeedA;
frictionDir.Normalise();
#else
frictionDir = vOtherSpeedA * (1.0f/fOtherSpeedA);
frictionDir = vOtherSpeedA * Invert<true, false>(fOtherSpeedA);
#endif
float massB = B->GetMass(pointposB, frictionDir);
speedSum = (massB*fOtherSpeedB + A->m_fMass*fOtherSpeedA)/(massB + A->m_fMass);
speedSum = Div<true, false>(massB*fOtherSpeedB + A->m_fMass*fOtherSpeedA, massB + A->m_fMass);
if(fOtherSpeedA > speedSum){
impulseA = (speedSum - fOtherSpeedA) * A->m_fMass;
impulseB = (speedSum - fOtherSpeedB) * massB;
@@ -962,10 +962,10 @@ CPhysical::ApplyFriction(CPhysical *B, float adhesiveLimit, CColPoint &colpoint)
frictionDir = vOtherSpeedA;
frictionDir.Normalise();
#else
frictionDir = vOtherSpeedA * (1.0f/fOtherSpeedA);
frictionDir = vOtherSpeedA * Invert<true, false>(fOtherSpeedA);
#endif
float massA = A->GetMass(pointposA, frictionDir);
speedSum = (B->m_fMass*fOtherSpeedB + massA*fOtherSpeedA)/(B->m_fMass + massA);
speedSum = Div<true, false>(B->m_fMass*fOtherSpeedB + massA*fOtherSpeedA, B->m_fMass + massA);
if(fOtherSpeedA > speedSum){
impulseA = (speedSum - fOtherSpeedA) * massA;
impulseB = (speedSum - fOtherSpeedB) * B->m_fMass;
@@ -995,11 +995,11 @@ CPhysical::ApplyFriction(CPhysical *B, float adhesiveLimit, CColPoint &colpoint)
frictionDir = vOtherSpeedA;
frictionDir.Normalise();
#else
frictionDir = vOtherSpeedA * (1.0f/fOtherSpeedA);
frictionDir = vOtherSpeedA * Invert<true, false>(fOtherSpeedA);
#endif
float massA = A->GetMass(pointposA, frictionDir);
float massB = B->GetMass(pointposB, frictionDir);
speedSum = (massB*fOtherSpeedB + massA*fOtherSpeedA)/(massB + massA);
speedSum = Div<true, false>(massB*fOtherSpeedB + massA*fOtherSpeedA, massB + massA);
if(fOtherSpeedA > speedSum){
impulseA = (speedSum - fOtherSpeedA) * massA;
impulseB = (speedSum - fOtherSpeedB) * massB;
@@ -1037,12 +1037,12 @@ CPhysical::ApplyFriction(float adhesiveLimit, CColPoint &colpoint)
frictionDir = vOtherSpeed;
frictionDir.Normalise();
#else
frictionDir = vOtherSpeed * (1.0f/fOtherSpeed);
frictionDir = vOtherSpeed * Div<true, false>(fOtherSpeed);
#endif
// not really impulse but speed
// maybe use ApplyFrictionMoveForce instead?
fImpulse = -fOtherSpeed;
impulseLimit = adhesiveLimit*CTimer::GetTimeStep() / m_fMass;
impulseLimit = Div<true, false>(adhesiveLimit*CTimer::GetTimeStep(), m_fMass);
if(fImpulse < -impulseLimit) fImpulse = -impulseLimit;
CVector vImpulse = frictionDir*fImpulse;
m_vecMoveFriction += CVector(vImpulse.x, vImpulse.y, 0.0f);
@@ -1060,7 +1060,7 @@ CPhysical::ApplyFriction(float adhesiveLimit, CColPoint &colpoint)
frictionDir = vOtherSpeed;
frictionDir.Normalise();
#else
frictionDir = vOtherSpeed * (1.0f/fOtherSpeed);
frictionDir = vOtherSpeed * Invert<true, false>(fOtherSpeed);
#endif
fImpulse = -fOtherSpeed * m_fMass;
impulseLimit = adhesiveLimit*CTimer::GetTimeStep() * 1.5;
@@ -1199,10 +1199,10 @@ CPhysical::ProcessShiftSectorList(CPtrList *lists)
if(CWorld::bSecondShift)
for(j = 0; j < numCollisions; j++)
shift += colpoints[j].GetNormal() * colpoints[j].GetDepth() * 1.5f / numCollisions;
shift += colpoints[j].GetNormal() * colpoints[j].GetDepth() * 1.5f * Invert<true, false>(numCollisions);
else
for(j = 0; j < numCollisions; j++)
shift += colpoints[j].GetNormal() * colpoints[j].GetDepth() * 1.2f / numCollisions;
shift += colpoints[j].GetNormal() * colpoints[j].GetDepth() * 1.2f * Invert<true, false>(numCollisions);
if(A->IsVehicle() && B->IsVehicle()){
CVector dir = A->GetPosition() - B->GetPosition();
@@ -1215,14 +1215,14 @@ CPhysical::ProcessShiftSectorList(CPtrList *lists)
float f = Min(Abs(dir.z), 0.9f);
dir.z = 0.0f;
dir.Normalise();
shift += dir * colpoints[mostColliding].GetDepth() / (1.0f - f);
shift += dir * colpoints[mostColliding].GetDepth() * Invert<true, false>(1.0f - f);
boat = B;
}else if(B->IsPed() && A->IsVehicle() && ((CVehicle*)A)->IsBoat()){
CVector dir = colpoints[mostColliding].GetNormal() * -1.0f;
float f = Min(Abs(dir.z), 0.9f);
dir.z = 0.0f;
dir.Normalise();
B->GetMatrix().Translate(dir * colpoints[mostColliding].GetDepth() / (1.0f - f));
B->GetMatrix().Translate(dir * colpoints[mostColliding].GetDepth() * Invert<true, false>(1.0f - f));
// BUG? how can that ever happen? A is a Ped
if(B->IsVehicle())
B->ProcessEntityCollision(A, colpoints);
@@ -1364,7 +1364,7 @@ collision:
DMAudio.ReportCollision(A, B, aColPoints[i].surfaceA, aColPoints[i].surfaceB, impulseA, Max(turnSpeedDiff, moveSpeedDiff));
if(A->ApplyFriction(B, CSurfaceTable::GetAdhesiveLimit(aColPoints[i])/numCollisions, aColPoints[i])){
if(A->ApplyFriction(B, Div<true, false>(CSurfaceTable::GetAdhesiveLimit(aColPoints[i]), numCollisions), aColPoints[i])){
A->bHasContacted = true;
B->bHasContacted = true;
}
@@ -1593,7 +1593,7 @@ CPhysical::ProcessCollisionSectorList(CPtrList *lists)
DMAudio.ReportCollision(A, B, aColPoints[i].surfaceA, aColPoints[i].surfaceB, imp, Max(turnSpeedDiff, moveSpeedDiff));
float adhesion = CSurfaceTable::GetAdhesiveLimit(aColPoints[i]) / numCollisions;
float adhesion = Div<true, false>(CSurfaceTable::GetAdhesiveLimit(aColPoints[i]), numCollisions);
if(A->GetModelIndex() == MI_RCBANDIT)
adhesion *= 0.2f;
@@ -1618,15 +1618,16 @@ CPhysical::ProcessCollisionSectorList(CPtrList *lists)
}
if(numResponses){
m_vecMoveSpeed += moveSpeed / numResponses;
m_vecTurnSpeed += turnSpeed / numResponses;
m_vecMoveSpeed += moveSpeed * Invert<true, false>(numResponses);
m_vecTurnSpeed += turnSpeed * Invert<true, false>(numResponses);
if(!CWorld::bNoMoreCollisionTorque &&
A->GetStatus() == STATUS_PLAYER && A->IsVehicle() &&
Abs(A->m_vecMoveSpeed.x) > 0.2f &&
Abs(A->m_vecMoveSpeed.y) > 0.2f){
A->m_vecMoveFriction.x += moveSpeed.x * -0.3f / numCollisions;
A->m_vecMoveFriction.y += moveSpeed.y * -0.3f / numCollisions;
A->m_vecTurnFriction += turnSpeed * -0.3f / numCollisions;
float invNumCollisions = Invert<true, false>(numCollisions);
A->m_vecMoveFriction.x += moveSpeed.x * -0.3f * invNumCollisions;
A->m_vecMoveFriction.y += moveSpeed.y * -0.3f * invNumCollisions;
A->m_vecTurnFriction += turnSpeed * -0.3f * invNumCollisions;
}
return true;
}
@@ -1685,7 +1686,7 @@ CPhysical::ProcessCollisionSectorList(CPtrList *lists)
DMAudio.ReportCollision(A, B, aColPoints[i].surfaceA, aColPoints[i].surfaceB, impulseA, Max(turnSpeedDiff, moveSpeedDiff));
if(A->ApplyFriction(B, CSurfaceTable::GetAdhesiveLimit(aColPoints[i])/numCollisions, aColPoints[i])){
if(A->ApplyFriction(B, Div<true, false>(CSurfaceTable::GetAdhesiveLimit(aColPoints[i]), numCollisions), aColPoints[i])){
A->bHasContacted = true;
B->bHasContacted = true;
}
@@ -1854,7 +1855,7 @@ CPhysical::ProcessShift(void)
}
// x is the number of units (m) we would like to step
#define NUMSTEPS(x) Ceil(Sqrt(distSq) * (1.0f/(x)))
#define NUMSTEPS(x) Ceil(Sqrt(distSq) * (Invert<true, false>(x)))
void
CPhysical::ProcessCollision(void)
@@ -1898,13 +1899,13 @@ CPhysical::ProcessCollision(void)
n = Max(NUMSTEPS(0.2f), 2.0f);
else
n = NUMSTEPS(0.3f);
step = savedTimeStep / n;
step = Div<true, false>(savedTimeStep, n);
}else if(IsVehicle() && distSq >= sq(0.4f)){
if(GetStatus() == STATUS_PLAYER)
n = NUMSTEPS(0.2f);
else
n = distSq > 0.32f ? NUMSTEPS(0.3f) : NUMSTEPS(0.4f);
step = savedTimeStep / n;
step = Div<true, false>(savedTimeStep, n);
}else if(IsObject()){
int responsecase = ((CObject*)this)->m_nSpecialCollisionResponseCases;
if(responsecase == COLLRESPONSE_LAMPOST){
@@ -1912,29 +1913,30 @@ CPhysical::ProcessCollision(void)
CVector speedDown = CVector(0.0f, 0.0f, 0.0f);
speedUp.z = GetBoundRadius();
speedDown.z = -speedUp.z;
speedUp = Multiply3x3(GetMatrix(), speedUp);
speedDown = Multiply3x3(GetMatrix(), speedDown);
mat_load(reinterpret_cast<matrix_t *>(&GetMatrix()));
mat_trans_normal3(speedUp.x, speedUp.y, speedUp.z);
mat_trans_normal3(speedDown.x, speedDown.y, speedDown.z); ;
speedUp = GetSpeed(speedUp);
speedDown = GetSpeed(speedDown);
distSq = Max(speedUp.MagnitudeSqr(), speedDown.MagnitudeSqr()) * sq(CTimer::GetTimeStep());
if(distSq >= sq(0.3f)){
n = NUMSTEPS(0.3f);
step = savedTimeStep / n;
step = Div<true, false>(savedTimeStep, n);
}
}else if(responsecase == COLLRESPONSE_UNKNOWN5){
if(distSq >= 0.009f){
n = NUMSTEPS(0.09f);
step = savedTimeStep / n;
step = Div<true, false>(savedTimeStep, n);
}
}else if(responsecase == COLLRESPONSE_SMALLBOX){
if(distSq >= sq(0.15f)){
n = NUMSTEPS(0.15f);
step = savedTimeStep / n;
step = Div<true, false>(savedTimeStep, n);
}
}else if(responsecase != COLLRESPONSE_FENCEPART){
if(distSq >= sq(0.3f)){
n = NUMSTEPS(0.3f);
step = savedTimeStep / n;
step = Div<true, false>(savedTimeStep, n);
}
}
}

View File

@@ -1,11 +1,5 @@
#include "common.h"
CMatrix::CMatrix(void)
{
m_attachment = nil;
m_hasRwMatrix = false;
}
CMatrix::CMatrix(CMatrix const &m)
{
m_attachment = nil;
@@ -60,20 +54,30 @@ CMatrix::Detach(void)
void
CMatrix::Update(void)
{
#ifndef DC_SH4
GetRight() = m_attachment->right;
GetForward() = m_attachment->up;
GetUp() = m_attachment->at;
GetPosition() = m_attachment->pos;
#else
mat_copy(reinterpret_cast<matrix_t*>(this),
reinterpret_cast<const matrix_t*>(m_attachment));
#endif
}
void
CMatrix::UpdateRW(void)
{
if (m_attachment) {
#ifndef DC_SH4
m_attachment->right = GetRight();
m_attachment->up = GetForward();
m_attachment->at = GetUp();
m_attachment->pos = GetPosition();
#else
mat_copy(reinterpret_cast<matrix_t*>(m_attachment),
reinterpret_cast<const matrix_t*>(this));
#endif
RwMatrixUpdate(m_attachment);
}
}
@@ -81,7 +85,8 @@ CMatrix::UpdateRW(void)
void
CMatrix::operator=(CMatrix const &rhs)
{
memcpy(this, &rhs, sizeof(f));
mat_copy(reinterpret_cast<matrix_t*>(this),
reinterpret_cast<const matrix_t*>(&rhs));
if (m_attachment)
UpdateRW();
}
@@ -89,7 +94,8 @@ CMatrix::operator=(CMatrix const &rhs)
void
CMatrix::CopyOnlyMatrix(const CMatrix &other)
{
memcpy(this, &other, sizeof(f));
mat_copy(reinterpret_cast<matrix_t*>(this),
reinterpret_cast<const matrix_t*>(&other));
}
CMatrix &
@@ -282,10 +288,33 @@ CMatrix::SetRotate(float xAngle, float yAngle, float zAngle)
void
CMatrix::RotateX(float x)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
mat_load(reinterpret_cast<matrix_t *>(this));
mat_rotate_x(x);
mat_store(reinterpret_cast<matrix_t *>(this));
#if defined(DC_SH4)
x *= 10430.37835f;
mat_load(reinterpret_cast<const matrix_t*>(this));
asm volatile(
"ftrc %[a], fpul\n\t"
".short 0xf4fd\n\t" /* fsca dr4 */
"fldi0 fr8\n\t"
"fldi0 fr11\n\t"
"fmov fr5, fr10\n\t"
"fmov fr4, fr9\n\t"
"fneg fr9\n\t"
"ftrv xmtrx, fv8\n\t"
"fmov fr4, fr6\n\t"
"fldi0 fr7\n\t"
"fldi0 fr4\n\t"
"ftrv xmtrx, fv4\n\t"
"fschg\n\t"
"fmov dr8, xd8\n\t"
"fmov dr10, xd10\n\t"
"fmov dr4, xd4\n\t"
"fmov dr6, xd6\n\t"
"fschg\n"
:
: [a] "f"(x)
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
mat_store(reinterpret_cast<matrix_t*>(this));
#else
auto [s, c] = SinCos(x);
@@ -312,10 +341,33 @@ CMatrix::RotateX(float x)
void
CMatrix::RotateY(float y)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
mat_load(reinterpret_cast<matrix_t *>(this));
mat_rotate_y(y);
mat_store(reinterpret_cast<matrix_t *>(this));
#if defined(DC_SH4)
y *= 10430.37835f;
mat_load(reinterpret_cast<const matrix_t*>(this));
asm volatile(
"ftrc %[a], fpul\n\t"
".short 0xf6fd\n\t" /* fsca dr6 */
"fldi0 fr9\n\t"
"fldi0 fr11\n\t"
"fmov fr6, fr8\n\t"
"fmov fr7, fr10\n\t"
"ftrv xmtrx, fv8\n\t"
"fmov fr7, fr4\n\t"
"fldi0 fr5\n\t"
"fneg fr6\n\t"
"fldi0 fr7\n\t"
"ftrv xmtrx, fv4\n\t"
"fschg\n\t"
"fmov dr8, xd8\n\t"
"fmov dr10, xd10\n\t"
"fmov dr4, xd0\n\t"
"fmov dr6, xd2\n\t"
"fschg\n"
:
: [a] "f"(y)
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
mat_store(reinterpret_cast<matrix_t*>(this));
#else
auto [s, c] = SinCos(y);
@@ -342,10 +394,32 @@ CMatrix::RotateY(float y)
void
CMatrix::RotateZ(float z)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
mat_load(reinterpret_cast<matrix_t *>(this));
mat_rotate_z(z);
mat_store(reinterpret_cast<matrix_t *>(this));
#if defined(DC_SH4)
z *= 10430.37835f;
mat_load(reinterpret_cast<const matrix_t*>(this));
asm volatile(
"ftrc %[a], fpul\n\t"
".short 0xf8fd\n\t" /* fsca dr8 */
"fldi0 fr10\n\t"
"fldi0 fr11\n\t"
"fmov fr8, fr5\n\t"
"fneg fr8\n\t"
"ftrv xmtrx, fv8\n\t"
"fmov fr9, fr4\n\t"
"fschg\n\t"
"fmov dr10, dr6\n\t"
"ftrv xmtrx, fv4\n\t"
"fmov dr8, xd4\n\t"
"fmov dr10, xd6\n\t"
"fmov dr4, xd0\n\t"
"fmov dr6, xd2\n\t"
"fschg\n"
:
: [a] "f"(z)
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
mat_store(reinterpret_cast<matrix_t*>(this));
#else
auto [s, c] = SinCos(z);
@@ -372,15 +446,22 @@ CMatrix::RotateZ(float z)
void
CMatrix::Rotate(float x, float y, float z)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
mat_load(reinterpret_cast<matrix_t *>(this));
mat_rotate(x, y, z);
#if defined(DC_SH4)
CMatrix rot;
rot.SetRotate(x, y, z);
mat_load_apply(reinterpret_cast<matrix_t *>(this),
reinterpret_cast<matrix_t *>(&rot));
mat_store(reinterpret_cast<matrix_t *>(this));
#else
auto [sX, cX] = SinCos(x);
auto [sY, cY] = SinCos(y);
#ifndef DC_SH4
auto [sZ, cZ] = SinCos(z);
#else /* Protecting against another goddamn fast-math ICE here! */
float sZ, cZ;
fsincosr(z, &sZ, &cZ);
#endif
float rx = this->rx;
float ry = this->ry;
float rz = this->rz;
@@ -455,70 +536,15 @@ CMatrix::Reorthogonalise(void)
f = CrossProduct(u, r);
}
#ifdef DC_SH4
static __always_inline void MATH_Load_Matrix_Product(const matrix_t* matrix1, const matrix_t* matrix2)
{
unsigned int prefetch_scratch;
asm volatile (
"mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
"add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
// back matrix
"fmov.d @%[bmtrx]+, XD0\n\t" // (LS)
"fmov.d @%[bmtrx]+, XD2\n\t"
"fmov.d @%[bmtrx]+, XD4\n\t"
"fmov.d @%[bmtrx]+, XD6\n\t"
"pref @%[fmtrx]\n\t" // prefetch fmtrx now while we wait (LS)
"fmov.d @%[bmtrx]+, XD8\n\t" // bmtrx prefetch should work for here
"fmov.d @%[bmtrx]+, XD10\n\t"
"fmov.d @%[bmtrx]+, XD12\n\t"
"mov %[fmtrx], %[pref_scratch]\n\t" // (MT)
"add #32, %[pref_scratch]\n\t" // store offset by 32 in r0 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
"fmov.d @%[bmtrx], XD14\n\t"
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
// front matrix
// interleave loads and matrix multiply 4x4
"fmov.d @%[fmtrx]+, DR0\n\t"
"fmov.d @%[fmtrx]+, DR2\n\t"
"fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
"ftrv XMTRX, FV0\n\t" // (FE)
"fmov.d @%[fmtrx]+, DR6\n\t"
"fmov.d @%[fmtrx]+, DR8\n\t"
"ftrv XMTRX, FV4\n\t"
"fmov.d @%[fmtrx]+, DR10\n\t"
"fmov.d @%[fmtrx]+, DR12\n\t"
"ftrv XMTRX, FV8\n\t"
"fmov.d @%[fmtrx], DR14\n\t" // (LS, but this will stall 'ftrv' for 3 cycles)
"fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
"ftrv XMTRX, FV12\n\t" // (FE)
// Save output in XF regs
"frchg\n"
: [bmtrx] "+&r" ((unsigned int)matrix1), [fmtrx] "+r" ((unsigned int)matrix2), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
: // no inputs
: "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
);
}
#endif
CMatrix
operator*(const CMatrix &m1, const CMatrix &m2)
{
// TODO: VU0 code
CMatrix out;
#if defined(RW_DC) && 0 // THIS IS BROKEN, 4th element shouldn't be processed
# ifdef DC_SH4
MATH_Load_Matrix_Product(reinterpret_cast<const matrix_t *>(&m1), reinterpret_cast<const matrix_t *>(&m2));
# elif defined(RW_DC)
mat_load(reinterpret_cast<const matrix_t *>(&m2));
mat_apply(reinterpret_cast<const matrix_t *>(&m1));
# endif
mat_store(reinterpret_cast<matrix_t *>(&out));
#ifdef RW_DC
mat_mult(reinterpret_cast<matrix_t *>(&out),
reinterpret_cast<const matrix_t *>(&m1),
reinterpret_cast<const matrix_t *>(&m2));
#else
out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz;
out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz;

View File

@@ -1,24 +1,25 @@
#pragma once
class CMatrix
class alignas(8) CMatrix
{
public:
union
{
alignas(8) float f[4][4];
alignas(8) float m[16];
struct alignas(8)
{
float rx, ry, rz, rw;
float fx, fy, fz, fw;
float ux, uy, uz, uw;
float px, py, pz, pw;
float rx, ry, rz, rw=0.0f;
float fx, fy, fz, fw=0.0f;
float ux, uy, uz, uw=0.0f;
float px, py, pz, pw=1.0f;
};
};
RwMatrix *m_attachment;
bool m_hasRwMatrix; // are we the owner?
RwMatrix *m_attachment = nil;
bool m_hasRwMatrix = false; // are we the owner?
CMatrix(void);
CMatrix(void) = default;
CMatrix(CMatrix const &m);
CMatrix(RwMatrix *matrix, bool owner = false);
CMatrix(float scale){

View File

@@ -39,10 +39,9 @@ CQuaternion::Slerp(const CQuaternion &q1, const CQuaternion &q2, float theta, fl
void
CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2)
{
x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
quat_mult(reinterpret_cast<quaternion_t *>(this),
reinterpret_cast<const quaternion_t &>(q1),
reinterpret_cast<const quaternion_t &>(q2));
}
void
@@ -50,10 +49,11 @@ CQuaternion::Get(RwV3d *axis, float *angle)
{
*angle = Acos(w);
float s = Sin(*angle);
float invS = dc::Invert<true, false>(s);
axis->x = x * (1.0f / s);
axis->y = y * (1.0f / s);
axis->z = z * (1.0f / s);
axis->x = x * invS;
axis->y = y * invS;
axis->z = z * invS;
}
void
@@ -104,7 +104,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) {
s = Sqrt(f + 1.0f);
w = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
x = (matrix.up.z - matrix.at.y) * m;
y = (matrix.at.x - matrix.right.z) * m;
z = (matrix.right.y - matrix.up.x) * m;
@@ -115,7 +115,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) {
s = Sqrt(f + 1.0f);
x = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
y = (matrix.up.x + matrix.right.y) * m;
z = (matrix.at.x + matrix.right.z) * m;
w = (matrix.up.z - matrix.at.y) * m;
@@ -126,7 +126,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) {
s = Sqrt(f + 1.0f);
y = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
w = (matrix.at.x - matrix.right.z) * m;
x = (matrix.up.x - matrix.right.y) * m;
z = (matrix.at.y + matrix.up.z) * m;
@@ -136,7 +136,7 @@ CQuaternion::Set(const RwMatrix &matrix)
f = matrix.at.z - (matrix.up.y + matrix.right.x);
s = Sqrt(f + 1.0f);
z = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
w = (matrix.right.y - matrix.up.x) * m;
x = (matrix.at.x + matrix.right.z) * m;
y = (matrix.at.y + matrix.up.z) * m;
@@ -151,8 +151,7 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
*f3 = Atan2(matrix.right.y, matrix.up.y);
if (*f3 < 0.0f)
*f3 += TWOPI;
float s = Sin(*f3);
float c = Cos(*f3);
auto [s, c] = SinCos(*f3);
*f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y);
if (*f1 < 0.0f)
*f1 += TWOPI;
@@ -164,12 +163,10 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
void
CQuaternion::Set(float f1, float f2, float f3)
{
float c1 = Cos(f1 * 0.5f);
float c2 = Cos(f2 * 0.5f);
float c3 = Cos(f3 * 0.5f);
float s1 = Sin(f1 * 0.5f);
float s2 = Sin(f2 * 0.5f);
float s3 = Sin(f3 * 0.5f);
auto [s1, c1] = SinCos(f1 * 0.5f);
auto [s2, c2] = SinCos(f2 * 0.5f);
auto [s3, c3] = SinCos(f3 * 0.5f);
x = ((c2 * c1) * s3) - ((s2 * s1) * c3);
y = ((s1 * c2) * c3) + ((s2 * c1) * s3);
z = ((s2 * c1) * c3) - ((s1 * c2) * s3);

View File

@@ -1,11 +1,11 @@
#pragma once
// TODO: actually implement this
class CQuaternion
class alignas(8) CQuaternion
{
public:
float x, y, z, w;
CQuaternion(void) {}
CQuaternion(void) = default;
CQuaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
float Magnitude(void) const { return Sqrt(MagnitudeSqr()); }
@@ -49,10 +49,11 @@ public:
}
const CQuaternion &operator/=(float right) {
x /= right;
y /= right;
z /= right;
w /= right;
float inv = dc::Invert(right);
x *= inv;
y *= inv;
z *= inv;
w *= inv;
return *this;
}
@@ -72,7 +73,11 @@ public:
inline float
DotProduct(const CQuaternion &q1, const CQuaternion &q2)
{
#ifndef DC_SH4
return q1.x*q2.x + q1.y*q2.y + q1.z*q2.z + q1.w*q2.w;
#else
return fipr(q1.x, q1.y, q1.z, q1.w, q2.x, q2.y, q2.z, q2.w);
#endif
}
inline CQuaternion operator+(const CQuaternion &left, const CQuaternion &right)
@@ -97,5 +102,6 @@ inline CQuaternion operator*(float left, const CQuaternion &right)
inline CQuaternion operator/(const CQuaternion &left, float right)
{
return CQuaternion(left.x / right, left.y / right, left.z / right, left.w / right);
right = dc::Invert(right);
return CQuaternion(left.x * right, left.y * right, left.z * right, left.w * right);
}

View File

@@ -28,17 +28,11 @@ CVector
Multiply3x3(const CMatrix &mat, const CVector &vec)
{
#ifdef DC_SH4
register float __x __asm__("fr12") = vec.x;
register float __y __asm__("fr13") = vec.y;
register float __z __asm__("fr14") = vec.z;
register float __w __asm__("fr15") = 0.0f;
CVector out;
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
asm volatile( "ftrv xmtrx, fv12\n"
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) );
return { __x, __y, __z };
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
out.x, out.y, out.z);
return out;
#else
// TODO: VU0 code
return CVector(mat.rx * vec.x + mat.fx * vec.y + mat.ux * vec.z,
@@ -50,9 +44,12 @@ Multiply3x3(const CMatrix &mat, const CVector &vec)
CVector
Multiply3x3(const CVector &vec, const CMatrix &mat)
{
return CVector(mat.rx * vec.x + mat.ry * vec.y + mat.rz * vec.z,
mat.fx * vec.x + mat.fy * vec.y + mat.fz * vec.z,
mat.ux * vec.x + mat.uy * vec.y + mat.uz * vec.z);
CVector out;
mat_load(reinterpret_cast<const matrix_t*>(&mat));
mat_transpose();
mat_trans_vec3_nomod(vec.x, vec.y, vec.z,
out.x, out.y, out.z);
return out;
}
CVector

View File

@@ -3,7 +3,7 @@
class CVector : public RwV3d
{
public:
CVector(void){}
CVector(void) = default;
CVector(float x, float y, float z)
{
this->x = x;
@@ -20,7 +20,7 @@ public:
// (0,1,0) means no rotation. So get right vector and its atan
__always_inline float Heading(void) const { return Atan2(-x, y); }
__always_inline float Magnitude(void) const {
#ifdef DC_SH4
#if defined(DC_SH4) && 0 /* Probably not a gain! */
float w;
vec3f_length(x, y, z, w);
return w;
@@ -29,7 +29,7 @@ public:
#endif
}
__always_inline float MagnitudeSqr(void) const {
#ifdef DC_SH4
#if defined(DC_SH4) && 0 /* Not a gain, shitty FIPR codegen! */
return fipr_magnitude_sqr(x, y,z, 0.0f);
#else
return x*x + y*y + z*z;
@@ -112,13 +112,14 @@ inline CVector operator*(float left, const CVector &right)
inline CVector operator/(const CVector &left, float right)
{
return CVector(left.x / right, left.y / right, left.z / right);
right = Invert(right);
return CVector(left.x * right, left.y * right, left.z * right);
}
__always_inline float
DotProduct(const CVector &v1, const CVector &v2)
{
#ifdef DC_SH4
#if defined(DC_SH4) && 0 /* Not a gain, FIPR sucks ass with codegen! */
return fipr(v1.x, v1.y, v1.z, 0.0f, v2.x, v2.y, v2.z, 0.0f);
#else
return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
@@ -131,7 +132,7 @@ inline float
Distance(const CVector &v1, const CVector &v2)
{
float w;
#ifdef DC_SH4
#if defined(DC_SH4) && 0 /* Probably not a gain! */
vec3f_distance(v1.x, v1.y, v1.z, v2.x, v2.y, v2.z, w);
return w;
#else

View File

@@ -80,7 +80,6 @@ __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const Rw
__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride)
{
assert(false);
#ifdef GTA_PS2
__asm__ __volatile__("\n\
paddub $3,%4,$0\n\
@@ -110,9 +109,11 @@ __always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat,
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
mat_transform(reinterpret_cast<vector_t *>(const_cast<RwV3d *>(in)),
reinterpret_cast<vector_t *>(out),
n, stride - sizeof(vector_t));
while(n--) {
mat_trans_single3_nodiv_nomod(in->x, in->y, in->z, out->x, out->y, out->z);
in = reinterpret_cast<const RwV3d *>(reinterpret_cast<const uint8_t *>(in) + stride);
++out;
}
#else
while(n--){
*out = mat * *in;

View File

@@ -1,73 +1,6 @@
#pragma once
#include "src/common_defines.h"
#include "rwdc_common.h"
#include <tuple>
#include <dc/matrix.h>
#ifdef DC_SH4
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 1.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
} while(false)
#define mat_trans_w_nodiv_nomod(x, y, z, w) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 1.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
w = __w; \
} while(false)
__always_inline float Fmac(float a, float b, float c) {
asm volatile ("fmac fr0, %[floatb], %[floatc]\n"
: [floatc] "+f" (c) : "w" (a), [floatb] "f" (b) : );
return c;
}
#else
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
vector_t tmp = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; w2 = tmp.w; \
} while(false)
#define mat_trans_w_nodiv_nomod(x_, y_, z_, w_) do { \
vector_t tmp = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
w_ = tmp.w; \
} while(false)
__always_inline float Fmac(float a, float b, float c) { return a * b + c; }
#endif
__always_inline float Sin(float x) { return __builtin_sinf(x); }
__always_inline float Cos(float x) { return __builtin_cosf(x); }
__always_inline auto SinCos(float x) { return std::pair { Sin(x), Cos(x) }; }
__always_inline float Tan(float x) { return __builtin_tanf(x); }
__always_inline float Abs(float x) { return __builtin_fabsf(x); }
__always_inline float Sqrt(float x) { return __builtin_sqrtf(x); }
__always_inline float RecipSqrt(float x) { return 1.0f / __builtin_sqrtf(x); }
__always_inline float Asin(float x) { return __builtin_asinf(x); }
__always_inline float Acos(float x) { return __builtin_acosf(x); }
__always_inline float Atan(float x) { return __builtin_atanf(x); }
__always_inline float Atan2(float y, float x) { return __builtin_atan2f(y, x); }
__always_inline float RecipSqrt(float x, float y) { return x / __builtin_sqrtf(y); /*y = RecipSqrt(y); return x * y * y;*/ }
__always_inline float Pow(float x, float y) { return __builtin_powf(x, y); }
__always_inline float Floor(float x) { return __builtin_floorf(x); }
__always_inline float Ceil(float x) { return __builtin_ceilf(x); }
__always_inline float Invert(float x) { return (((x) < 0.0f)? -1.0f : 1.0f) * RecipSqrt((x) * (x)); }
__always_inline float Div(float x, float y) { return x * Invert(y); }
__always_inline float Lerp(float a, float b, float t) { return Fmac(t, (b - a), a); }
using namespace dc;

View File

@@ -482,12 +482,12 @@ CCoronas::RenderReflections(void)
float fadeDistance = drawDist / 2.0f;
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
distanceFade = Clamp(distanceFade, 0.0f, 1.0f);
float recipz = 1.0f/RwCameraGetNearClipPlane(Scene.camera);
float recipz = Invert<true, false>(RwCameraGetNearClipPlane(Scene.camera));
float heightFade = (20.0f - aCoronas[i].heightAboveRoad)/20.0f;
int intensity = distanceFade*heightFade * 230.0 * CWeather::WetRoads;
CSprite::RenderBufferedOneXLUSprite(
#ifdef FIX_BUGS
#if defined(FIX_BUGS) && 1
spriteCoors.x, spriteCoors.y, spriteCoors.z,
#else
spriteCoors.x, spriteCoors.y, RwIm2DGetNearScreenZ(),
@@ -608,13 +608,16 @@ CEntity::ProcessLightsForEntity(void)
flashTimer3 = 0;
n = CModelInfo::GetModelInfo(GetModelIndex())->GetNum2dEffects();
mat_load(reinterpret_cast<const matrix_t*>(&GetMatrix()));
for(i = 0; i < n; i++, flashTimer1 += 0x80, flashTimer2 += 0x100, flashTimer3 += 0x200){
effect = CModelInfo::GetModelInfo(GetModelIndex())->Get2dEffect(i);
if(effect->type != EFFECT_LIGHT)
continue;
pos = GetMatrix() * effect->pos;
//pos = GetMatrix() * effect->pos;
mat_trans_single3_nodiv_nomod(effect->pos.x, effect->pos.y, effect->pos.z,
pos.x, pos.y, pos.z);
lightOn = false;
lightFlickering = false;

View File

@@ -1,14 +1,14 @@
#pragma once
struct CColLine
struct alignas(8) CColLine
{
// NB: this has to be compatible with two CVuVectors
CVector p0;
int pad0;
CVector p1;
int pad1;
alignas(8) CVector p0;
float pad0 = 0.0f;
alignas(8) CVector p1;
float pad1 = 0.0f;
CColLine(void) { };
CColLine(void) = default;
CColLine(const CVector &p0, const CVector &p1) { this->p0 = p0; this->p1 = p1; };
void Set(const CVector &p0, const CVector &p1);
};

View File

@@ -1,12 +1,12 @@
#pragma once
struct CColPoint
struct alignas(8) CColPoint
{
CVector point;
int pad1;
alignas(8) CVector point;
float pad1 = 0.0f;
// the surface normal on the surface of point
CVector normal;
int pad2;
alignas(8) CVector normal;
float pad2 = 0.0f;
uint8 surfaceA;
uint8 pieceA;
uint8 surfaceB;

View File

@@ -617,12 +617,13 @@ bool
CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq)
{
CVector dist = s1.center - s2.center;
float d = dist.Magnitude() - s2.radius; // distance from s1's center to s2
float mag = dist.Magnitude();
float d = mag - s2.radius; // distance from s1's center to s2
float depth = s1.radius - d; // sphere overlap
if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2
// no collision if sphere is not close enough
if(d*d < mindistsq && d < s1.radius){
dist.Normalise();
dist *= Invert<true, false>(mag);
point.point = s1.center - dist*d;
point.normal = dist;
#ifndef VU_COLLISION

View File

@@ -39,10 +39,9 @@ CQuaternion::Slerp(const CQuaternion &q1, const CQuaternion &q2, float theta, fl
void
CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2)
{
x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
quat_mult(reinterpret_cast<quaternion_t *>(this),
reinterpret_cast<const quaternion_t &>(q1),
reinterpret_cast<const quaternion_t &>(q2));
}
void
@@ -50,10 +49,11 @@ CQuaternion::Get(RwV3d *axis, float *angle)
{
*angle = Acos(w);
float s = Sin(*angle);
float invS = dc::Invert<true, false>(s);
axis->x = x * (1.0f / s);
axis->y = y * (1.0f / s);
axis->z = z * (1.0f / s);
axis->x = x * invS;
axis->y = y * invS;
axis->z = z * invS;
}
void
@@ -104,7 +104,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) {
s = Sqrt(f + 1.0f);
w = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
x = (matrix.up.z - matrix.at.y) * m;
y = (matrix.at.x - matrix.right.z) * m;
z = (matrix.right.y - matrix.up.x) * m;
@@ -115,7 +115,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) {
s = Sqrt(f + 1.0f);
x = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
y = (matrix.up.x + matrix.right.y) * m;
z = (matrix.at.x + matrix.right.z) * m;
w = (matrix.up.z - matrix.at.y) * m;
@@ -126,7 +126,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) {
s = Sqrt(f + 1.0f);
y = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
w = (matrix.at.x - matrix.right.z) * m;
x = (matrix.up.x - matrix.right.y) * m;
z = (matrix.at.y + matrix.up.z) * m;
@@ -136,7 +136,7 @@ CQuaternion::Set(const RwMatrix &matrix)
f = matrix.at.z - (matrix.up.y + matrix.right.x);
s = Sqrt(f + 1.0f);
z = 0.5f * s;
m = 0.5f / s;
m = Div<true, false>(0.5f, s);
w = (matrix.right.y - matrix.up.x) * m;
x = (matrix.at.x + matrix.right.z) * m;
y = (matrix.at.y + matrix.up.z) * m;
@@ -151,8 +151,7 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
*f3 = Atan2(matrix.right.y, matrix.up.y);
if (*f3 < 0.0f)
*f3 += TWOPI;
float s = Sin(*f3);
float c = Cos(*f3);
auto [s, c] = SinCos(*f3);
*f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y);
if (*f1 < 0.0f)
*f1 += TWOPI;
@@ -164,12 +163,10 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
void
CQuaternion::Set(float f1, float f2, float f3)
{
float c1 = Cos(f1 * 0.5f);
float c2 = Cos(f2 * 0.5f);
float c3 = Cos(f3 * 0.5f);
float s1 = Sin(f1 * 0.5f);
float s2 = Sin(f2 * 0.5f);
float s3 = Sin(f3 * 0.5f);
auto [s1, c1] = SinCos(f1 * 0.5f);
auto [s2, c2] = SinCos(f2 * 0.5f);
auto [s3, c3] = SinCos(f3 * 0.5f);
x = ((c2 * c1) * s3) - ((s2 * s1) * c3);
y = ((s1 * c2) * c3) + ((s2 * c1) * s3);
z = ((s2 * c1) * c3) - ((s1 * c2) * s3);

View File

@@ -21,18 +21,18 @@ matrix_t XMTRX = {
{ 0.0f, 0.0f, 0.0f, 0.0f }
};
void mat_identity() {
void (mat_identity)() {
XMTRX[0][0] = 1.0f; XMTRX[1][0] = 0.0f; XMTRX[2][0] = 0.0f; XMTRX[3][0] = 0.0f;
XMTRX[0][1] = 0.0f; XMTRX[1][1] = 1.0f; XMTRX[2][1] = 0.0f; XMTRX[3][1] = 0.0f;
XMTRX[0][2] = 0.0f; XMTRX[1][2] = 0.0f; XMTRX[2][2] = 1.0f; XMTRX[3][2] = 0.0f;
XMTRX[0][3] = 0.0f; XMTRX[1][3] = 0.0f; XMTRX[2][3] = 0.0f; XMTRX[3][3] = 1.0f;
}
void mat_load(const matrix_t *mat) {
void (mat_load)(const matrix_t *mat) {
memcpy(XMTRX, mat, sizeof(matrix_t));
}
void mat_store(matrix_t *mat) {
void (mat_store)(matrix_t *mat) {
memcpy(mat, XMTRX, sizeof(matrix_t));
}
@@ -47,13 +47,13 @@ static void mat_mult(matrix_t dst, const matrix_t &src1, const matrix_t &src2) {
}
}
void mat_apply(const matrix_t* mat) {
void (mat_apply)(const matrix_t* mat) {
matrix_t result;
mat_mult(result, *mat, XMTRX);
mat_load(&result);
}
void mat_transform(vector_t *invecs, vector_t *outvecs, int veccnt, int vecskip) {
void (mat_transform)(vector_t *invecs, vector_t *outvecs, int veccnt, int vecskip) {
for(unsigned v = 0; v < veccnt; ++v) {
auto offset = v * (sizeof(vector_t) + vecskip);
auto *in = reinterpret_cast<vector_t *>(reinterpret_cast<uint8_t *>(invecs) + offset);

View File

@@ -23,6 +23,8 @@
#include "rwengine.h"
#include "fcaseopen.h"
using namespace dc;
namespace rw {
#define PLUGIN_ID 0
@@ -45,12 +47,17 @@ int32 build = 0xFFFF;
bool32 streamAppendFrames = 0;
char *debugFile = nil;
static Matrix identMat = {
{ 1.0f, 0.0f, 0.0f }, Matrix::IDENTITY|Matrix::TYPEORTHONORMAL,
{ 0.0f, 1.0f, 0.0f }, 0,
{ 0.0f, 0.0f, 1.0f }, 0,
{ 0.0f, 0.0f, 0.0f }, 0
};
static Matrix identMat = {{
.right = { 1.0f, 0.0f, 0.0f },
.flags = Matrix::IDENTITY|Matrix::TYPEORTHONORMAL,
.pad0 = 0,
.up = { 0.0f, 1.0f, 0.0f },
.upw = 0.0f,
.at = { 0.0f, 0.0f, 1.0f },
.atw = 0.0f,
.pos = { 0.0f, 0.0f, 0.0f },
.posw = 1.0f
}};
// lazy implementation
int
@@ -86,16 +93,6 @@ strncmp_ci(const char *s1, const char *s2, int n)
return 0;
}
Quat
mult(const Quat &q, const Quat &p)
{
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
}
Quat*
Quat::rotate(const V3d *axis, float32 angle, CombineOp op)
{
@@ -133,18 +130,21 @@ lerp(const Quat &q, const Quat &p, float32 r)
Quat
slerp(const Quat &q, const Quat &p, float32 a)
{
float32 c;
Quat q1 = q;
c = dot(q1, p);
float c = dot(q1, p);
if(c < 0.0f){
c = -c;
q1 = negate(q1);
}
float32 phi = acosf(c);
if(phi > 0.00001f){
float32 s = sinf(phi);
return add(scale(q1, sinf((1.0f-a)*phi)/s),
scale(p, sinf(a*phi)/s));
#ifndef DC_SH4
float32 s = dc::Invert(sinf(phi));
#else /* Can't use builtin here, ICEs! */
float32 s = dc::Invert(fsin(phi));
#endif
return add(scale(q1, sinf((1.0f-a)*phi)*s),
scale(p, sinf(a*phi)*s));
}
return q1;
}
@@ -165,6 +165,7 @@ void
V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m)
{
int32 i;
#ifndef DC_SH4
V3d tmp;
for(i = 0; i < n; i++){
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
@@ -172,12 +173,21 @@ V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m)
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
out[i] = tmp;
}
#else
mat_load(reinterpret_cast<const matrix_t*>(m));
//rw_mat_load_4x4(m);
for(i = 0; i < n; i++) {
mat_trans_single3_nodiv_nomod(in[i].x, in[i].y, in[i].z,
out[i].x, out[i].y, out[i].z);
}
#endif
}
void
V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
{
int32 i;
#ifndef DC_SH4
V3d tmp;
for(i = 0; i < n; i++){
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
@@ -185,6 +195,14 @@ V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
out[i] = tmp;
}
#else
//mat_load_3x3(reinterpret_cast<const matrix_t *>(m));
mat_load(reinterpret_cast<const matrix_t*>(m));
for(i = 0; i < n; i++) {
mat_trans_vec3_nomod(in[i].x, in[i].y, in[i].z,
out[i].x, out[i].y, out[i].z);
}
#endif
}
//
@@ -194,27 +212,15 @@ V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
void
RawMatrix::mult(RawMatrix *dst, RawMatrix *src1, RawMatrix *src2)
{
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x + src1->rightw*src2->pos.x;
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y + src1->rightw*src2->pos.y;
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z + src1->rightw*src2->pos.z;
dst->rightw = src1->right.x*src2->rightw + src1->right.y*src2->upw + src1->right.z*src2->atw + src1->rightw*src2->posw;
dst->up.x = src1->up.x*src2->right.x + src1->up.y*src2->up.x + src1->up.z*src2->at.x + src1->upw*src2->pos.x;
dst->up.y = src1->up.x*src2->right.y + src1->up.y*src2->up.y + src1->up.z*src2->at.y + src1->upw*src2->pos.y;
dst->up.z = src1->up.x*src2->right.z + src1->up.y*src2->up.z + src1->up.z*src2->at.z + src1->upw*src2->pos.z;
dst->upw = src1->up.x*src2->rightw + src1->up.y*src2->upw + src1->up.z*src2->atw + src1->upw*src2->posw;
dst->at.x = src1->at.x*src2->right.x + src1->at.y*src2->up.x + src1->at.z*src2->at.x + src1->atw*src2->pos.x;
dst->at.y = src1->at.x*src2->right.y + src1->at.y*src2->up.y + src1->at.z*src2->at.y + src1->atw*src2->pos.y;
dst->at.z = src1->at.x*src2->right.z + src1->at.y*src2->up.z + src1->at.z*src2->at.z + src1->atw*src2->pos.z;
dst->atw = src1->at.x*src2->rightw + src1->at.y*src2->upw + src1->at.z*src2->atw + src1->atw*src2->posw;
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src1->posw*src2->pos.x;
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src1->posw*src2->pos.y;
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src1->posw*src2->pos.z;
dst->posw = src1->pos.x*src2->rightw + src1->pos.y*src2->upw + src1->pos.z*src2->atw + src1->posw*src2->posw;
mat_mult(reinterpret_cast<matrix_t *>(dst),
reinterpret_cast<const matrix_t *>(src2),
reinterpret_cast<const matrix_t *>(src1));
}
void
RawMatrix::transpose(RawMatrix *dst, RawMatrix *src)
{
#ifndef DC_SH4
dst->right.x = src->right.x;
dst->up.x = src->right.y;
dst->at.x = src->right.z;
@@ -231,17 +237,22 @@ RawMatrix::transpose(RawMatrix *dst, RawMatrix *src)
dst->upw = src->pos.y;
dst->atw = src->pos.z;
dst->posw = src->posw;
#else
mat_load(reinterpret_cast<const matrix_t*>(src));
mat_transpose();
mat_store(reinterpret_cast<matrix_t*>(dst));
#endif
}
void
RawMatrix::setIdentity(RawMatrix *dst)
{
static RawMatrix identity = {
static RawMatrix identity = {{
{ 1.0f, 0.0f, 0.0f }, 0.0f,
{ 0.0f, 1.0f, 0.0f }, 0.0f,
{ 0.0f, 0.0f, 1.0f }, 0.0f,
{ 0.0f, 0.0f, 0.0f }, 1.0f
};
}};
*dst = identity;
}
@@ -302,8 +313,9 @@ Matrix::mult(Matrix *dst, const Matrix *src1, const Matrix *src2)
else if(src2->flags & IDENTITY)
*dst = *src1;
else{
uint8_t flags = src1->flags & src2->flags;
mult_(dst, src1, src2);
dst->flags = src1->flags & src2->flags;
dst->flags = flags;
}
return dst;
}
@@ -324,6 +336,7 @@ Matrix::invert(Matrix *dst, const Matrix *src)
Matrix*
Matrix::transpose(Matrix *dst, const Matrix *src)
{
#ifndef DC_SH4
if(src->flags & IDENTITY)
*dst = *src;
dst->right.x = src->right.x;
@@ -338,25 +351,29 @@ Matrix::transpose(Matrix *dst, const Matrix *src)
dst->pos.x = 0.0;
dst->pos.y = 0.0;
dst->pos.z = 0.0;
#else
mat_load(reinterpret_cast<const matrix_t*>(src));
mat_transpose();
mat_store(reinterpret_cast<matrix_t*>(dst));
#endif
return dst;
}
Matrix*
Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
{
Matrix tmp, rot;
makeRotation(&rot, axis, angle);
Matrix rot;
switch(op){
case COMBINEREPLACE:
*this = rot;
makeRotation(this, axis, angle);
break;
case COMBINEPRECONCAT:
mult(&tmp, &rot, this);
*this = tmp;
makeRotation(&rot, axis, angle);
mult(this, &rot, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &rot);
*this = tmp;
makeRotation(&rot, axis, angle);
mult(this, this, &rot);
break;
}
return this;
@@ -365,19 +382,18 @@ Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
Matrix*
Matrix::rotate(const Quat &q, CombineOp op)
{
Matrix tmp, rot;
makeRotation(&rot, q);
Matrix rot;
switch(op){
case COMBINEREPLACE:
*this = rot;
makeRotation(this, q);
break;
case COMBINEPRECONCAT:
mult(&tmp, &rot, this);
*this = tmp;
makeRotation(&rot, q);
mult(this, &rot, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &rot);
*this = tmp;
makeRotation(&rot, q);
mult(this, this, &rot);
break;
}
return this;
@@ -385,21 +401,24 @@ Matrix::rotate(const Quat &q, CombineOp op)
Matrix*
Matrix::translate(const V3d *translation, CombineOp op)
{
Matrix tmp;
Matrix trans = identMat;
trans.pos = *translation;
trans.flags &= ~IDENTITY;
Matrix trans;
switch(op){
case COMBINEREPLACE:
*this = trans;
*this = identMat;
this->pos = *translation;
this->flags &= ~IDENTITY;
break;
case COMBINEPRECONCAT:
mult(&tmp, &trans, this);
*this = tmp;
trans = identMat;
trans.pos = *translation;
trans.flags &= ~IDENTITY;
mult(this, &trans, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &trans);
*this = tmp;
trans = identMat;
trans.pos = *translation;
trans.flags &= ~IDENTITY;
mult(this, this, &trans);
break;
}
return this;
@@ -408,23 +427,30 @@ Matrix::translate(const V3d *translation, CombineOp op)
Matrix*
Matrix::scale(const V3d *scale, CombineOp op)
{
Matrix tmp;
Matrix scl = identMat;
scl.right.x = scale->x;
scl.up.y = scale->y;
scl.at.z = scale->z;
scl.flags &= ~IDENTITY;
Matrix scl;
switch(op){
case COMBINEREPLACE:
*this = scl;
*this = identMat;
this->right.x = scale->x;
this->up.y = scale->y;
this->at.z = scale->z;
this->flags &= ~IDENTITY;
break;
case COMBINEPRECONCAT:
mult(&tmp, &scl, this);
*this = tmp;
scl = identMat;
scl.right.x = scale->x;
scl.up.y = scale->y;
scl.at.z = scale->z;
scl.flags &= ~IDENTITY;
mult(this, &scl, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &scl);
*this = tmp;
scl = identMat;
scl.right.x = scale->x;
scl.up.y = scale->y;
scl.at.z = scale->z;
scl.flags &= ~IDENTITY;
mult(this, this, &scl);
break;
}
return this;
@@ -433,18 +459,15 @@ Matrix::scale(const V3d *scale, CombineOp op)
Matrix*
Matrix::transform(const Matrix *mat, CombineOp op)
{
Matrix tmp;
switch(op){
case COMBINEREPLACE:
*this = *mat;
break;
case COMBINEPRECONCAT:
mult(&tmp, mat, this);
*this = tmp;
mult(this, mat, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, mat);
*this = tmp;
mult(this, this, mat);
break;
}
return this;
@@ -456,29 +479,34 @@ Matrix::getRotation(void)
Quat q = { 0.0f, 0.0f, 0.0f, 1.0f };
float32 tr = right.x + up.y + at.z;
float s;
float invs;
if(tr > 0.0f){
s = sqrtf(1.0f + tr) * 2.0f;
invs = Invert<true, false>(s);
q.w = s / 4.0f;
q.x = (up.z - at.y) / s;
q.y = (at.x - right.z) / s;
q.z = (right.y - up.x) / s;
q.x = (up.z - at.y) * invs;
q.y = (at.x - right.z) * invs;
q.z = (right.y - up.x) * invs;
}else if(right.x > up.y && right.x > at.z){
s = sqrtf(1.0f + right.x - up.y - at.z) * 2.0f;
q.w = (up.z - at.y) / s;
invs = Invert<true, false>(s);
q.w = (up.z - at.y) * invs;
q.x = s / 4.0f;
q.y = (up.x + right.y) / s;
q.z = (at.x + right.z) / s;
q.y = (up.x + right.y) * invs;
q.z = (at.x + right.z) * invs;
}else if(up.y > at.z){
s = sqrtf(1.0f + up.y - right.x - at.z) * 2.0f;
q.w = (at.x - right.z) / s;
q.x = (up.x + right.y) / s;
invs = Invert<true, false>(s);
q.w = (at.x - right.z) * invs;
q.x = (up.x + right.y) * invs;
q.y = s / 4.0f;
q.z = (at.y + up.z) / s;
q.z = (at.y + up.z) * invs;
}else{
s = sqrtf(1.0f + at.z - right.x - up.y) * 2.0f;
q.w = (right.y - up.x) / s;
q.x = (at.x + right.z) / s;
q.y = (at.y + up.z) / s;
invs = Invert<true, false>(s);
q.w = (right.y - up.x) * invs;
q.x = (at.x + right.z) * invs;
q.y = (at.y + up.z) * invs;
q.z = s / 4.0f;
}
return q;
@@ -499,35 +527,27 @@ Matrix::lookAt(const V3d &dir, const V3d &up)
* i.e. a vector is first xformed by src1, then by src2
*/
void
Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const Matrix *__restrict__ src2)
Matrix::mult_(Matrix *dst, const Matrix *src1, const Matrix *src2)
{
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
dst->right.x = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
dst->right.y = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
dst->right.z = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
dst->up.x = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
dst->up.y = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
dst->up.z = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
dst->at.x = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
dst->at.y = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
dst->at.z = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
dst->pos.x = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.x, src2->up.x, src2->at.x, src2->pos.x);
dst->pos.y = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.y, src2->up.y, src2->at.y, src2->pos.y);
dst->pos.z = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.z, src2->up.z, src2->at.z, src2->pos.z);
#else
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
dst->up.x = src1->up.x*src2->right.x + src1->up.y*src2->up.x + src1->up.z*src2->at.x;
dst->up.y = src1->up.x*src2->right.y + src1->up.y*src2->up.y + src1->up.z*src2->at.y;
dst->up.z = src1->up.x*src2->right.z + src1->up.y*src2->up.z + src1->up.z*src2->at.z;
dst->at.x = src1->at.x*src2->right.x + src1->at.y*src2->up.x + src1->at.z*src2->at.x;
dst->at.y = src1->at.x*src2->right.y + src1->at.y*src2->up.y + src1->at.z*src2->at.y;
dst->at.z = src1->at.x*src2->right.z + src1->at.y*src2->up.z + src1->at.z*src2->at.z;
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
#endif
#ifdef RW_DC
mat_mult(reinterpret_cast<matrix_t *>(dst),
reinterpret_cast<const matrix_t *>(src2),
reinterpret_cast<const matrix_t *>(src1));
#else
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
dst->up.x = src1->up.x*src2->right.x + src1->up.y*src2->up.x + src1->up.z*src2->at.x;
dst->up.y = src1->up.x*src2->right.y + src1->up.y*src2->up.y + src1->up.z*src2->at.y;
dst->up.z = src1->up.x*src2->right.z + src1->up.y*src2->up.z + src1->up.z*src2->at.z;
dst->at.x = src1->at.x*src2->right.x + src1->at.y*src2->up.x + src1->at.z*src2->at.x;
dst->at.y = src1->at.x*src2->right.y + src1->at.y*src2->up.y + src1->at.z*src2->at.y;
dst->at.z = src1->at.x*src2->right.z + src1->at.y*src2->up.z + src1->at.z*src2->at.z;
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
#endif
}
void
@@ -536,12 +556,16 @@ Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
dst->right.x = src->right.x;
dst->right.y = src->up.x;
dst->right.z = src->at.x;
dst->flags = TYPEORTHONORMAL;
dst->pad0 = 0;
dst->up.x = src->right.y;
dst->up.y = src->up.y;
dst->up.z = src->at.y;
dst->upw = 0.0f;
dst->at.x = src->right.z;
dst->at.y = src->up.z;
dst->at.z = src->at.z;
dst->atw = 0.0f;
dst->pos.x = -(src->pos.x*src->right.x +
src->pos.y*src->right.y +
src->pos.z*src->right.z);
@@ -551,7 +575,7 @@ Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
dst->pos.z = -(src->pos.x*src->at.x +
src->pos.y*src->at.y +
src->pos.z*src->at.z);
dst->flags = TYPEORTHONORMAL;
dst->posw = 1.0f;
}
Matrix*
@@ -566,7 +590,7 @@ Matrix::invertGeneral(Matrix *dst, const Matrix *src)
det = src->up.x * dst->right.y + src->at.x * dst->right.z + dst->right.x * src->right.x;
invdet = 1.0;
if(det != 0.0f)
invdet = 1.0f/det;
invdet = Invert<false>(det);
dst->right.x *= invdet;
dst->right.y *= invdet;
dst->right.z *= invdet;
@@ -580,6 +604,10 @@ Matrix::invertGeneral(Matrix *dst, const Matrix *src)
dst->pos.y = -(src->pos.x*dst->right.y + src->pos.y*dst->up.y + src->pos.z*dst->at.y);
dst->pos.z = -(src->pos.x*dst->right.z + src->pos.y*dst->up.z + src->pos.z*dst->at.z);
dst->flags &= ~IDENTITY;
dst->pad0 = 0;
dst->upw = 0.0f;
dst->atw = 0.0f;
dst->posw = 1.0f;
return dst;
}
@@ -1035,66 +1063,6 @@ StreamMemory::getLength(void)
return this->length;
}
#if defined(DC_SH4) && 0 // Disabled for now because it is broken
#include <kos/fs.h>
StreamFile*
StreamFile::open(const char *path, const char *mode)
{
assert(this->fd < 0);
this->fd = fs_open(path, mode[0] == 'r' ? O_RDONLY : O_WRONLY);
if(this->fd < 0){
RWERROR((ERR_FILE, path));
return nil;
}
return this;
}
void
StreamFile::close(void)
{
assert(this->fd < 0);
fs_close(this->fd);
this->fd = -1;
}
uint32
StreamFile::write8(const void *data, uint32 length)
{
auto rv = fs_write(this->fd, data, length);
assert(rv == length);
return (uint32)rv;
}
uint32
StreamFile::read8(void *data, uint32 length)
{
auto rv = fs_read(this->fd, data, length);
assert(rv == length);
return (uint32)rv;
}
void
StreamFile::seek(int32 offset, int32 whence)
{
fs_seek(this->fd, offset, whence);
}
uint32
StreamFile::tell(void)
{
return fs_tell(this->fd);
}
bool
StreamFile::eof(void)
{
return fs_total(this->fd) == fs_tell(this->fd);
}
#else
StreamFile*
StreamFile::open(const char *path, const char *mode)
{
@@ -1148,7 +1116,6 @@ StreamFile::eof(void)
{
return ( feof(this->file) != 0 );
}
#endif
uint8 *
StreamFile::mmap(uint32 len)

File diff suppressed because it is too large Load Diff

613
vendor/librw/src/dc/rwdc_common.h vendored Normal file
View File

@@ -0,0 +1,613 @@
#ifndef __RWDC_COMMON_H
#define __RWDC_COMMON_H
#include <tuple>
#include <limits>
#include <cmath>
#include <type_traits>
#include <algorithm>
#include <cstring>
#ifndef RW_DC
# pragma warning(disable: 4244) // int to float
# pragma warning(disable: 4800) // int to bool
# pragma warning(disable: 4838) // narrowing conversion
# pragma warning(disable: 4996) // POSIX names
#else
# if !defined(DC_TEXCONV) && !defined(DC_SIM)
# include <kos.h>
# define DC_SH4
# define VIDEO_MODE_WIDTH static_cast<float>(vid_mode->width)
# define VIDEO_MODE_HEIGHT static_cast<float>(vid_mode->height)
# define memcpy4 memcpy
# else
# ifdef DC_TEXCONV
# define malloc_stats()
# endif
# include <dc/matrix.h>
# include <dc/pvr.h>
# include <kos/dbglog.h>
# define VIDEO_MODE_WIDTH 640.0f
# define VIDEO_MODE_HEIGHT 480.0f
# define memcpy4 memcpy
# define dcache_pref_block(a) __builtin_prefetch(a)
# define F_PI M_PI
# ifndef __always_inline
# define __always_inline inline
# endif
# endif
# ifdef PVR_TXRFMT_STRIDE
# undef PVR_TXRFMT_STRIDE
# define PVR_TXRFMT_STRIDE (1 << 25)
# endif
static_assert(PVR_TXRFMT_STRIDE == (1 << 25),
"PVR_TXRFMT_STRIDE is bugged in your KOS version");
#endif
#define F_PI_2 (F_PI * 0.5f)
#define __hot __attribute__((hot))
#define __cold __attribute__((cold))
#define STRINGIFY(x) #x
#define STR(x) STRINGIFY(x)
#define CONCAT_(x,y) x##y
#define CONCAT(x,y) CONCAT_(x,y)
namespace rw {
class Matrix;
}
namespace dc {
struct quaternion_t {
float x, y, z, w;
};
__always_inline __hot constexpr float Sin(float x) { return sinf(x); }
__always_inline __hot constexpr float Cos(float x) { return cosf(x); }
__always_inline __hot constexpr auto SinCos(float x) { return std::pair { Sin(x), Cos(x) }; }
__always_inline __hot constexpr float Abs(float x) { return fabsf(x); }
__always_inline __hot constexpr float Sqrt(float x) { return sqrtf(x); }
__always_inline __hot constexpr float RecipSqrt(float x, float y) { return x / Sqrt(y); }
__always_inline __hot constexpr float Pow(float x, float y) { return powf(x, y); }
__always_inline __hot constexpr float Floor(float x) { return floorf(x); }
__always_inline __hot constexpr float Ceil(float x) { return ceilf(x); }
__always_inline __hot constexpr float Fmac(auto a, auto b, auto c) { return a * b + c; }
__always_inline __hot constexpr float Lerp(float a, float b, float t) { return Fmac(t, (b - a), a); }
__always_inline __hot constexpr auto Max(auto a, auto b) { return ((a > b)? a : b); }
__always_inline __hot constexpr auto Min(auto a, auto b) { return ((a < b)? a : b); }
template<bool CHECK_ZERO=false>
__always_inline __hot constexpr float RecipSqrt(float x) {
if constexpr(CHECK_ZERO)
if(x == 0.0f)
return 0.0f;
return 1.0f / Sqrt(x);
}
template<typename T>
__always_inline __hot constexpr T Clamp(T v, auto low, auto high) {
return std::clamp(v, static_cast<T>(low), static_cast<T>(high));
}
__always_inline constexpr auto Clamp2(auto v, auto center, auto radius) {
return (v > center) ? Min(v, center + radius) : Max(v, center - radius);
}
template<bool FAST_APPROX=true, bool COPY_SIGN=true>
__always_inline __hot constexpr float Invert(float x) {
float value;
if(!std::is_constant_evaluated() && FAST_APPROX) {
value = RecipSqrt(x * x);
if constexpr(COPY_SIGN)
if(x < 0.0f)
value = -value;
} else value = 1.0f / x;
return value;
}
template<bool FAST_APPROX=true, bool COPY_SIGN=true>
__always_inline __hot constexpr float Div(float x, float y) {
if(FAST_APPROX && !std::is_constant_evaluated())
return x * Invert<true, COPY_SIGN>(y);
else
return x / y;
}
template<bool FAST_APPROX=false, bool COPY_SIGN=true>
__always_inline __hot constexpr auto Norm(auto value, auto min, auto max) {
auto numerator = Clamp(value, min, max) - min;
auto denominator = (max - min);
if(FAST_APPROX && !std::is_constant_evaluated())
return Div<true, COPY_SIGN>(numerator, denominator);
else
return numerator / denominator;
}
template<bool FAST_APPROX=false, bool FAST_DIV=false, bool DIV_COPY_SIGN=false>
__always_inline __hot constexpr float Tan(float x) {
if(!std::is_constant_evaluated() && FAST_APPROX) {
constexpr float pisqby4 = 2.4674011002723397f;
constexpr float adjpisqby4 = 2.471688400562703f;
constexpr float adj1minus8bypisq = 0.189759681063053f;
float xsq = x * x;
return x * Div<FAST_DIV, DIV_COPY_SIGN>(adjpisqby4 - adj1minus8bypisq * xsq,
pisqby4 - xsq);
} else
return tanf(x);
}
template<bool FAST_APPROX=false>
__always_inline __hot constexpr float Atan(float x) {
if(FAST_APPROX && !std::is_constant_evaluated()) {
constexpr float a[3] = { //
0.998418889819911f, -2.9993501171084700E-01f, 0.0869142852883849f};
float xx = x * x;
return ((a[2] * xx + a[1]) * xx + a[0]) * x;
} else return atanf(x);
}
template<bool FAST_APPROX=false>
__hot constexpr float Atan2(float y, float x) {
if(FAST_APPROX && !std::is_constant_evaluated()) {
#if 0
constexpr float halfpi_i754 = M_PI * 0.5f;
constexpr float quarterpi_i754 = M_PI * 0.25f;
// kludge to prevent 0/0 condition
float abs_y = Abs(y) + std::numeric_limits<float>::epsilon();
float absy_plus_absx = abs_y + Abs(x);
float inv_absy_plus_absx = Invert<true, true>(absy_plus_absx);
float angle = halfpi_i754 - copysignf(quarterpi_i754, x);
float r = (x - copysignf(abs_y, x)) * inv_absy_plus_absx;
angle += (0.1963f * r * r - 0.9817f) * r;
return copysignf(angle, y);
#else
// Ensure input is in [-1, +1]
bool swap = fabs(x) < fabs(y);
float atan_input = (swap ? x : y) / (swap ? y : x);
// Approximate atan
float res = Atan<true>(atan_input);
// If swapped, adjust atan output
res = swap ? (atan_input >= 0.0f ? F_PI_2 : -F_PI_2) - res : res;
// Adjust quadrants
if (x >= 0.0f && y >= 0.0f) {} // 1st quadrant
else if (x < 0.0f && y >= 0.0f) { res = F_PI + res; } // 2nd quadrant
else if (x < 0.0f && y < 0.0f) { res = -F_PI + res; } // 3rd quadrant
else if (x >= 0.0f && y < 0.0f) {} // 4th quadrant
// Store result
return res;
#endif
} else return atan2f(y, x);
}
template<bool FAST_APPROX=false>
__always_inline __hot constexpr float Asin(float x) {
if(FAST_APPROX && !std::is_constant_evaluated()) {
Atan(Div<true, false>(x, Sqrt(1.0f-(x*x))));
} else return asinf(x);
}
template<bool FAST_APPROX=false>
__always_inline __hot constexpr float Acos(float x) {
if(FAST_APPROX && !std::is_constant_evaluated()) {
return (-0.69813170079773212f * x * x - 0.87266462599716477f) * x + 1.5707963267948966f;
} else return acosf(x);
}
#ifdef RW_DC
# ifdef DC_SH4
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 1.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
} while(false)
#define mat_trans_nodiv_nomod_zerow(x, y, z, x2, y2, z2, w2) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 0.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
} while(false)
#define mat_trans_w_nodiv_nomod(x, y, z, w) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 1.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
w = __w; \
} while(false)
#define mat_trans_vec3(x, y, z) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
__asm__ __volatile__( \
"fldi0 fr15\n" \
"ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z) \
: "0" (__x), "1" (__y), "2" (__z) \
: "fr15" ); \
x = __x; y = __y; z = __z; \
} while(false)
#define mat_trans_vec3_nomod(x, y, z, x2, y2, z2) { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
__asm__ __volatile__( \
"fldi0 fr15\n" \
"ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z) \
: "0" (__x), "1" (__y), "2" (__z) \
: "fr15" ); \
x2 = __x; y2 = __y; z2 = __z; \
}
__always_inline __hot void mat_transpose(void) {
asm volatile (
"frchg\n\t" // fmov for singles only works on front bank
// FR0, FR5, FR10, and FR15 are already in place
// swap FR1 and FR4
"flds FR1, FPUL\n\t"
"fmov FR4, FR1\n\t"
"fsts FPUL, FR4\n\t"
// swap FR2 and FR8
"flds FR2, FPUL\n\t"
"fmov FR8, FR2\n\t"
"fsts FPUL, FR8\n\t"
// swap FR3 and FR12
"flds FR3, FPUL\n\t"
"fmov FR12, FR3\n\t"
"fsts FPUL, FR12\n\t"
// swap FR6 and FR9
"flds FR6, FPUL\n\t"
"fmov FR9, FR6\n\t"
"fsts FPUL, FR9\n\t"
// swap FR7 and FR13
"flds FR7, FPUL\n\t"
"fmov FR13, FR7\n\t"
"fsts FPUL, FR13\n\t"
// swap FR11 and FR14
"flds FR11, FPUL\n\t"
"fmov FR14, FR11\n\t"
"fsts FPUL, FR14\n\t"
// restore XMTRX to back bank
"frchg\n"
: // no outputs
: // no inputs
: "fpul" // clobbers
);
}
template<bool FAST_APPROX=true>
__hot constexpr inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
if(FAST_APPROX && !std::is_constant_evaluated()) {
/*
// reorder the coefficients so that q1 stays in constant order {x,y,z,w}
// q2 then needs to be rotated after each inner product
x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
*/
// keep q1 in fv4
register float q1x __asm__ ("fr4") = (q1.x);
register float q1y __asm__ ("fr5") = (q1.y);
register float q1z __asm__ ("fr6") = (q1.z);
register float q1w __asm__ ("fr7") = (q1.w);
// load q2 into fv8, use it to get the shuffled reorder into fv0
register float q2x __asm__ ("fr8") = (q2.x);
register float q2y __asm__ ("fr9") = (q2.y);
register float q2z __asm__ ("fr10") = (q2.z);
register float q2w __asm__ ("fr11") = (q2.w);
// temporary operand / result in fv0
register float t1x __asm__ ("fr0");
register float t1y __asm__ ("fr1");
register float t1z __asm__ ("fr2");
register float t1w __asm__ ("fr3");
// x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
t1x = q2w;
t1y = q2z;
t1z = -q2y;
t1w = q2w;
__asm__ ("\n"
" fipr fv4,fv0\n"
: "+f" (t1w)
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
"f" (t1x), "f" (t1y), "f" (t1z)
);
// x = t1w; try to avoid the stall by not reading the fipr result immediately
// y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
t1x = -q2z;
t1y = q2w;
t1z = q2x;
__atomic_thread_fence(1);
r->x = t1w; // get previous result
t1w = q2y;
__asm__ ("\n"
" fipr fv4,fv0\n"
: "+f" (t1w)
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
"f" (t1x), "f" (t1y), "f" (t1z)
);
//y = t1w;
// z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
t1x = q2y;
t1y = -q2x;
t1z = q2w;
__atomic_thread_fence(1);
r->y = t1w; // get previous result
t1w = q2z;
__asm__ ("\n"
" fipr fv4,fv0\n"
: "+f" (t1w)
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
"f" (t1x), "f" (t1y), "f" (t1z)
);
//z = t1w;
__atomic_thread_fence(1);
// w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
q2x = -q2x;
q2y = -q2y;
q2z = -q2z;
__asm__ ("\n"
" fipr fv4,fv8\n"
: "+f" (q2w)
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
"f" (q2x), "f" (q2y), "f" (q2z)
);
__atomic_thread_fence(1);
r->z = t1w;
__atomic_thread_fence(1);
r->w = q2w;
} else {
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
}
}
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
unsigned int prefetch_scratch;
asm volatile (
"mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
"add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
// back matrix
"fmov.d @%[bmtrx]+, XD0\n\t" // (LS)
"fmov.d @%[bmtrx]+, XD2\n\t"
"fmov.d @%[bmtrx]+, XD4\n\t"
"fmov.d @%[bmtrx]+, XD6\n\t"
"pref @%[fmtrx]\n\t" // prefetch fmtrx now while we wait (LS)
"fmov.d @%[bmtrx]+, XD8\n\t" // bmtrx prefetch should work for here
"fmov.d @%[bmtrx]+, XD10\n\t"
"fmov.d @%[bmtrx]+, XD12\n\t"
"mov %[fmtrx], %[pref_scratch]\n\t" // (MT)
"add #32, %[pref_scratch]\n\t" // store offset by 32 in r0 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
"fmov.d @%[bmtrx], XD14\n\t"
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
// front matrix
// interleave loads and matrix multiply 4x4
"fmov.d @%[fmtrx]+, DR0\n\t"
"fmov.d @%[fmtrx]+, DR2\n\t"
"fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
"ftrv XMTRX, FV0\n\t" // (FE)
"fmov.d @%[fmtrx]+, DR6\n\t"
"fmov.d @%[fmtrx]+, DR8\n\t"
"ftrv XMTRX, FV4\n\t"
"fmov.d @%[fmtrx]+, DR10\n\t"
"fmov.d @%[fmtrx]+, DR12\n\t"
"ftrv XMTRX, FV8\n\t"
"fmov.d @%[fmtrx], DR14\n\t" // (LS, but this will stall 'ftrv' for 3 cycles)
"fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
"ftrv XMTRX, FV12\n\t" // (FE)
// Save output in XF regs
"frchg\n"
: [bmtrx] "+&r" ((unsigned int)matrix1), [fmtrx] "+r" ((unsigned int)matrix2), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
: // no inputs
: "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
);
}
# else
# ifdef DC_TEXCONV
# define mat_transform(a, b, c, d)
# define mat_apply(a)
# define mat_load(a)
# define mat_store(a)
# define mat_identity(a)
# define pvr_fog_table_color(a,r,g,b)
# define pvr_fog_table_linear(s,e)
# endif
#define mat_trans_vec3(x_, y_, z_) do { \
vector_t tmp = { x_, y_, z_, 0.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
x_ = tmp.x; y_ = tmp.y; z_ = tmp.z; \
} while(false)
#define mat_trans_vec3_nomod(x_, y_, z_, x2, y2, z2) do { \
vector_t tmp = { x_, y_, z_, 0.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; \
} while(false)
#define mat_trans_single3_nomod(x_, y_, z_, x2, y2, z2) do { \
vector_t tmp = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
z2 = 1.0f / tmp.w; \
x2 = tmp.x * z2; \
y2 = tmp.y * z2; \
} while(false)
#define mat_trans_single3_nodiv_nomod(x_, y_, z_, x2, y2, z2) do { \
vector_t tmp = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
z2 = tmp.z; \
x2 = tmp.x; \
y2 = tmp.y; \
} while(false)
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
vector_t tmp1233123 = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
x2 = tmp1233123.x; y2 = tmp1233123.y; z2 = tmp1233123.z; w2 = tmp1233123.w; \
} while(false)
#define mat_trans_nodiv_nomod_zerow(x_, y_, z_, x2, y2, z2, w2) do { \
vector_t tmp1233123 = { x_, y_, z_, 0.0f }; \
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
x2 = tmp1233123.x; y2 = tmp1233123.y; z2 = tmp1233123.z; w2 = tmp1233123.w; \
} while(false)
#define mat_trans_w_nodiv_nomod(x_, y_, z_, w_) do { \
vector_t tmp1233123 = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp1233123, &tmp1233123, 1, 0); \
w_ = tmp1233123.w; \
} while(false)
inline void mat_transpose(void) {
matrix_t tmp;
for(unsigned i = 0; i < 4; ++i)
for(unsigned j = 0; j < 4; ++j)
tmp[j][i] = XMTRX[i][j];
mat_load(&tmp);
}
template<bool FAST_APPROX=true>
__hot inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
}
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
mat_load(matrix1);
mat_apply(matrix2);
}
#endif
__hot inline void mat_mult(matrix_t *out, const matrix_t* matrix1, const matrix_t* matrix2) {
mat_load_apply(matrix1, matrix2);
mat_store(out);
}
__always_inline __hot void mat_copy(matrix_t *dst, const matrix_t *src) {
mat_load(src);
mat_store(dst);
}
#if defined(DC_SH4)
inline uint8_t* OCRAM = (uint8_t*)0x7c001000;
#else
alignas(32) inline uint8_t OCRAM[32 * 256];
#endif
inline void ocram_enter() {
#if defined(DC_SH4)
auto mask = irq_disable();
dcache_purge_all();
volatile uint32_t * CCN_CCR = (uint32_t *)0xFF00001C;
*CCN_CCR |= (1 << 5); // enable OCR (ORA)
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
irq_restore(mask);
#endif
}
inline void ocram_leave() {
#if defined(DC_SH4)
auto mask = irq_disable();
dcache_inval_range(0x92000000, 8192);
dcache_purge_all();
volatile uint32_t * CCN_CCR = (uint32_t *)0xFF00001C;
*CCN_CCR &= ~( 1 << 5); // disable OCR (ORA)
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
__asm__ __volatile__ ("nop");
irq_restore(mask);
#endif
}
#endif
}
#endif /* RWDC_COMMON_H */

View File

@@ -64,13 +64,14 @@ malloc_managed(size_t sz, uint32 hint)
MemoryBlock *mem;
if(sz == 0) return nil;
bool align64 = !!(hint & ID_MATRIX);
origPtr = malloc(sz + sizeof(MemoryBlock) + 15);
if(origPtr == nil)
return nil;
totalMemoryAllocated += sz;
data = (uint8*)origPtr;
data += sizeof(MemoryBlock);
data = (uint8*)ALIGN16((uintptr)data);
data = (uint8*) ALIGN16((uintptr)data);
mem = (MemoryBlock*)(data-sizeof(MemoryBlock));
mem->sz = sz;

View File

@@ -10,6 +10,8 @@
#define M_PI 3.14159265358979323846
#endif
#include "rwdc_common.h"
// TODO: clean up the opengl defines
// and figure out what we even want here...
#ifdef RW_GL3
@@ -305,21 +307,45 @@ inline Quat scale(const Quat &q, float32 r) { return makeQuat(q.w*r, q.x*r, q.y*
inline float32 length(const Quat &q) { return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z); }
inline Quat normalize(const Quat &q) { return scale(q, 1.0f/length(q)); }
inline Quat conj(const Quat &q) { return makeQuat(q.w, -q.x, -q.y, -q.z); }
Quat mult(const Quat &q, const Quat &p);
inline Quat mult(const Quat &q, const Quat &p) {
Quat r;
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&r),
reinterpret_cast<const dc::quaternion_t &>(q),
reinterpret_cast<const dc::quaternion_t &>(p));
return r;
}
inline V3d rotate(const V3d &v, const Quat &q) { return mult(mult(q, makeQuat(0.0f, v)), conj(q)).vec(); }
Quat lerp(const Quat &q, const Quat &p, float32 r);
Quat slerp(const Quat &q, const Quat &p, float32 a);
struct __attribute__((aligned(8))) RawMatrix
struct alignas(8) RawMatrixBase
{
V3d right;
float32 rightw;
float32 rightw = 0.0f;
V3d up;
float32 upw;
float32 upw = 0.0f;
V3d at;
float32 atw;
float32 atw = 0.0f;
V3d pos;
float32 posw;
float32 posw = 1.0f;
};
struct RawMatrix : public RawMatrixBase
{
RawMatrix() = default;
RawMatrix(RawMatrixBase &&aggregate):
RawMatrixBase{aggregate}
{}
RawMatrix(const RawMatrix& rhs) {
*this = rhs;
}
RawMatrix &operator=(const RawMatrix& rhs) {
dc::mat_copy(reinterpret_cast<matrix_t*>(this), reinterpret_cast<const matrix_t*>(&rhs));
return *this;
}
// NB: this is dst = src2*src1, i.e. src1 is applied first, then src2
static void mult(RawMatrix *dst, RawMatrix *src1, RawMatrix *src2);
@@ -327,7 +353,7 @@ struct __attribute__((aligned(8))) RawMatrix
static void setIdentity(RawMatrix *dst);
};
struct Matrix
struct alignas(8) MatrixBase
{
enum Type {
TYPENORMAL = 1,
@@ -336,7 +362,8 @@ struct Matrix
TYPEMASK = 3
};
enum Flags {
IDENTITY = 0x20000
IDENTITY = 0x4,
IDENTITY_OLD = 0x20000
};
struct Tolerance {
float32 normal;
@@ -344,14 +371,47 @@ struct Matrix
float32 identity;
};
V3d right;
uint32 flags;
V3d up;
uint32 pad1;
V3d at;
uint32 pad2;
V3d pos;
uint32 pad3;
V3d right = { 1.0f, 0.0f, 0.0f };
union {
struct {
uint32 flags: 3 = TYPEORTHONORMAL|IDENTITY;
uint32_t pad0: 29 = 0;
};
float rightw;
};
V3d up = { 0.0f, 1.0f, 0.0f };
union {
uint32 pad1;
float upw = 0.0f;
};
V3d at = { 0.0f, 0.0f, 1.0f };
union {
uint32 pad2;
float atw = 0.0f;
};
V3d pos = { 0.0f, 0.0f, 0.0f };
union {
uint32 pad3;
float posw = 1.0f;
};
};
struct Matrix: public MatrixBase
{
Matrix() = default;
Matrix(MatrixBase &&aggregate):
MatrixBase{aggregate}
{}
Matrix(const Matrix& rhs) {
*this = rhs;
}
Matrix &operator=(const Matrix& rhs) {
dc::mat_copy(reinterpret_cast<matrix_t*>(this), reinterpret_cast<const matrix_t*>(&rhs));
return *this;
}
static Matrix *create(void);
void destroy(void);

View File

@@ -224,6 +224,7 @@ writeSkin(Stream *stream, int32 len, void *object, int32 offset, int32)
for(int32 i = 0; i < skin->numBones; i++){
if(oldFormat)
stream->writeU32(0xdeaddead);
stream->write32(&skin->inverseMatrices[i*16], 64);
}