Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assertion : NHL 2002 unimplemented writable const register #1189

Open
Triticum0 opened this issue Jul 17, 2022 · 2 comments
Open

Assertion : NHL 2002 unimplemented writable const register #1189

Triticum0 opened this issue Jul 17, 2022 · 2 comments
Labels
bug Something isn't working

Comments

@Triticum0
Copy link

Triticum0 commented Jul 17, 2022

Title

NHL 2002 - https://xemu.app/titles/45410005

Bug Description

This game has an unimplemented writable const register path which causes this game to crash with an assertion when trying to go in-game.

Expected Behavior

This game should make it In-game

xemu Version

Version: 0.7.60
Branch: master
Commit: 8d274e5
Date: Wed Jul 13 23:19:36 UTC 2022

System Information

Field Value
OS Windows 10
CPU AMD Ryzen 5 2600 Six-Core Processor
Graphics Device NVIDIA GeForce RTX 3060 Ti/PCIe/SSE2
Graphics Driver 4.0.0 NVIDIA 512.95

Additional Context

This game give a Shader compilation failure after the assertion is hit

Failing Fragment Shader from NHL 2002
#version 400

uniform vec2 clipRange;
uniform vec2 surfaceSize;

uniform vec4 c[192];

uniform vec4 fogColor;
uniform float fogParam[2];

#define fogPlane c[0x39]
#define texMat0 mat4(c[0x44], c[0x44+1], c[0x44+2], c[0x44+3])
#define texMat1 mat4(c[0x4c], c[0x4c+1], c[0x4c+2], c[0x4c+3])
#define texMat2 mat4(c[0x54], c[0x54+1], c[0x54+2], c[0x54+3])
#define texMat3 mat4(c[0x5c], c[0x5c+1], c[0x5c+2], c[0x5c+3])

vec4 oPos = vec4(0.0,0.0,0.0,1.0);
vec4 oD0 = vec4(0.0,0.0,0.0,1.0);
vec4 oD1 = vec4(0.0,0.0,0.0,1.0);
vec4 oB0 = vec4(0.0,0.0,0.0,1.0);
vec4 oB1 = vec4(0.0,0.0,0.0,1.0);
vec4 oPts = vec4(0.0,0.0,0.0,1.0);
vec4 oFog = vec4(1.0,0.0,0.0,1.0);
vec4 oT0 = vec4(0.0,0.0,0.0,1.0);
vec4 oT1 = vec4(0.0,0.0,0.0,1.0);
vec4 oT2 = vec4(0.0,0.0,0.0,1.0);
vec4 oT3 = vec4(0.0,0.0,0.0,1.0);

vec4 decompress_11_11_10(int cmp) {
float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;
float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;
float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;
return vec4(x, y, z, 1);
}
struct VertexData {
float inv_w;
vec4 D0;
vec4 D1;
vec4 B0;
vec4 B1;
float Fog;
vec4 T0;
vec4 T1;
vec4 T2;
vec4 T3;
};
noperspective out VertexData g_vtx;
#define vtx g_vtx

layout(location = 0) in vec4 v0;
layout(location = 1) in vec4 v1;
layout(location = 2) in vec4 v2;
layout(location = 3) in vec4 v3;
layout(location = 4) in vec4 v4;
layout(location = 5) in vec4 v5;
layout(location = 6) in vec4 v6;
layout(location = 7) in vec4 v7;
layout(location = 8) in vec4 v8;
layout(location = 9) in vec4 v9;
layout(location = 10) in vec4 v10;
layout(location = 11) in vec4 v11;
layout(location = 12) in vec4 v12;
layout(location = 13) in vec4 v13;
layout(location = 14) in vec4 v14;
layout(location = 15) in vec4 v15;

int A0 = 0;

vec4 R0 = vec4(0.0,0.0,0.0,0.0);
vec4 R1 = vec4(0.0,0.0,0.0,0.0);
vec4 R2 = vec4(0.0,0.0,0.0,0.0);
vec4 R3 = vec4(0.0,0.0,0.0,0.0);
vec4 R4 = vec4(0.0,0.0,0.0,0.0);
vec4 R5 = vec4(0.0,0.0,0.0,0.0);
vec4 R6 = vec4(0.0,0.0,0.0,0.0);
vec4 R7 = vec4(0.0,0.0,0.0,0.0);
vec4 R8 = vec4(0.0,0.0,0.0,0.0);
vec4 R9 = vec4(0.0,0.0,0.0,0.0);
vec4 R10 = vec4(0.0,0.0,0.0,0.0);
vec4 R11 = vec4(0.0,0.0,0.0,0.0);
#define R12 oPos

vec4 _temp_vec;
int _temp_addr;

/* Converts the input to vec4, pads with last component */
vec4 _in(float v) { return vec4(v); }
vec4 _in(vec2 v) { return v.xyyy; }
vec4 _in(vec3 v) { return v.xyzz; }
vec4 _in(vec4 v) { return v.xyzw; }

#define INFINITY (1.0 / 0.0)

#define MOV(dest, mask, src) dest.mask = _MOV(_in(src)).mask
vec4 _MOV(vec4 src)
{
return src;
}

#define MUL(dest, mask, src0, src1) dest.mask = _MUL(_in(src0), _in(src1)).mask
vec4 _MUL(vec4 src0, vec4 src1)
{
return src0 * src1;
}

#define ADD(dest, mask, src0, src1) dest.mask = _ADD(_in(src0), _in(src1)).mask
vec4 _ADD(vec4 src0, vec4 src1)
{
return src0 + src1;
}

#define MAD(dest, mask, src0, src1, src2) dest.mask = _MAD(_in(src0), _in(src1), _in(src2)).mask
vec4 _MAD(vec4 src0, vec4 src1, vec4 src2)
{
return src0 * src1 + src2;
}

#define DP3(dest, mask, src0, src1) dest.mask = _DP3(_in(src0), _in(src1)).mask
vec4 _DP3(vec4 src0, vec4 src1)
{
return vec4(dot(src0.xyz, src1.xyz));
}

#define DPH(dest, mask, src0, src1) dest.mask = _DPH(_in(src0), _in(src1)).mask
vec4 _DPH(vec4 src0, vec4 src1)
{
return vec4(dot(vec4(src0.xyz, 1.0), src1));
}

#define DP4(dest, mask, src0, src1) dest.mask = _DP4(_in(src0), _in(src1)).mask
vec4 _DP4(vec4 src0, vec4 src1)
{
return vec4(dot(src0, src1));
}

#define DST(dest, mask, src0, src1) dest.mask = _DST(_in(src0), _in(src1)).mask
vec4 _DST(vec4 src0, vec4 src1)
{
return vec4(1.0,
src0.y * src1.y,
src0.z,
src1.w);
}

#define MIN(dest, mask, src0, src1) dest.mask = _MIN(_in(src0), _in(src1)).mask
vec4 _MIN(vec4 src0, vec4 src1)
{
return min(src0, src1);
}

#define MAX(dest, mask, src0, src1) dest.mask = _MAX(_in(src0), _in(src1)).mask
vec4 _MAX(vec4 src0, vec4 src1)
{
return max(src0, src1);
}

#define SLT(dest, mask, src0, src1) dest.mask = _SLT(_in(src0), _in(src1)).mask
vec4 _SLT(vec4 src0, vec4 src1)
{
return vec4(lessThan(src0, src1));
}

#define ARL(dest, src) dest = _ARL(_in(src).x)
int _ARL(float src)
{
/* Xbox GPU does specify rounding, OpenGL doesn't; so we need a bias.

  • Example: We probably want to floor 16.99.. to 17, not 16.
  • Source of error (why we get 16.99.. instead of 17.0) is typically
  • vertex-attributes being normalized from a byte value to float:
  • 17 / 255 = 0.06666.. so is this 0.06667 (ceil) or 0.06666 (floor)?
  • Which value we get depends on the host GPU.
  • If we multiply these rounded values by 255 later, we get:
  • 17.00 (ARL result = 17) or 16.99 (ARL result = 16).
  • We assume the intend was to get 17, so we add our bias to fix it. */
    return int(floor(src + 0.001));
    }

#define SGE(dest, mask, src0, src1) dest.mask = _SGE(_in(src0), _in(src1)).mask
vec4 _SGE(vec4 src0, vec4 src1)
{
return vec4(greaterThanEqual(src0, src1));
}

#define RCP(dest, mask, src) dest.mask = _RCP(_in(src).x).mask
vec4 _RCP(float src)
{
return vec4(1.0 / src);
}

#define RCC(dest, mask, src) dest.mask = _RCC(_in(src).x).mask
vec4 _RCC(float src)
{
float t = 1.0 / src;
if (t > 0.0) {
t = clamp(t, 5.42101e-020, 1.884467e+019);
} else {
t = clamp(t, -1.884467e+019, -5.42101e-020);
}
return vec4(t);
}

#define RSQ(dest, mask, src) dest.mask = _RSQ(_in(src).x).mask
vec4 _RSQ(float src)
{
if (src == 0.0) { return vec4(INFINITY); }
if (isinf(src)) { return vec4(0.0); }
return vec4(inversesqrt(abs(src)));
}

#define EXP(dest, mask, src) dest.mask = _EXP(_in(src).x).mask
vec4 _EXP(float src)
{
vec4 result;
result.x = exp2(floor(src));
result.y = src - floor(src);
result.z = exp2(src);
result.w = 1.0;
return result;
}

#define LOG(dest, mask, src) dest.mask = _LOG(_in(src).x).mask
vec4 _LOG(float src)
{
float tmp = abs(src);
if (tmp == 0.0) { return vec4(-INFINITY, 1.0f, -INFINITY, 1.0f); }
vec4 result;
result.x = floor(log2(tmp));
result.y = tmp / exp2(floor(log2(tmp)));
result.z = log2(tmp);
result.w = 1.0;
return result;
}

#define LIT(dest, mask, src) dest.mask = _LIT(_in(src)).mask
vec4 _LIT(vec4 src)
{
vec4 s = src;
float epsilon = 1.0 / 256.0;
s.w = clamp(s.w, -(128.0 - epsilon), 128.0 - epsilon);
s.x = max(s.x, 0.0);
s.y = max(s.y, 0.0);
vec4 t = vec4(1.0, 0.0, 0.0, 1.0);
t.y = s.x;
t.z = (s.x > 0.0) ? exp2(s.w * log2(s.y)) : 0.0;
return t;
}
void main() {
/* Slot 0: 0x00000000 0x002C601B 0x0C36106C 0x2F200FF8 */
MOV(R2,xyzw, c[99]);

/* Slot 1: 0x00000000 0x002C201B 0x0C36106C 0x2F300FF8 */
MOV(R3,xyzw, c[97]);

/* Slot 2: 0x00000000 0x002C401B 0x0C36106C 0x2F400FF8 */
MOV(R4,xyzw, c[98]);

/* Slot 3: 0x00000000 0x006DC01B 0x0C3613FC 0x2F500FF8 */
ADD(R5,xyzw, c[110], v0.w);

/* Slot 4: 0x00000000 0x006DA01B 0x0C3613FC 0x2E600FF8 */
ADD(R6,xyz, c[109], v0.w);

/* Slot 5: 0x00000000 0x006D801B 0x0C3613FC 0x2C700FF8 */
ADD(R7,xy, c[108], v0.w);

/* Slot 6: 0x00000000 0x014E2000 0x0800186C 0x21800FF8 */
MAX(R8,w, v0.x, c[113].x);

/* Slot 7: 0x00000000 0x016D6000 0x0DFE106C 0x21900FF8 */
SLT(R9,w, c[107].x, v0.w);

/* Slot 8: 0x00000000 0x006D6000 0x0C3617FC 0x20708358 */
ADD(c107,x, c[107].x, -v0.w);

/* Slot 9: 0x00000000 0x008FA0FF 0x55FE1AA8 0x38A00FF8 */
MAD(R10,x, R5.w, c[125].w, c[125].z);

/* Slot 10: 0x00000000 0x008F4000 0x5436186C 0x9FB00FF8 */
MAD(R11,xyzw, R5.x, c[122], R2);

/* Slot 11: 0x00000000 0x008EE000 0x6436186C 0xDF000FF8 */
MAD(R0,xyzw, R6.x, c[119], R3);

/* Slot 12: 0x00000000 0x008E8000 0x7436186D 0x1F200FF8 */
MAD(R2,xyzw, R7.x, c[116], R4);

/* Slot 13: 0x00000000 0x008F6055 0x5436186E 0xDF300FF8 */
MAD(R3,xyzw, R5.y, c[123], R11);

/* Slot 14: 0x00000000 0x008F0055 0x6436186C 0x1F400FF8 */
MAD(R4,xyzw, R6.y, c[120], R0);

/* Slot 15: 0x00000000 0x042E60AA 0x0C36106E 0x94840FF8 */
MOV(_temp_vec,y, c[115].z);
RCP(R1,y, R10.x);
R8.y = _temp_vec.y;

/* Slot 16: 0x00000000 0x008D60FF 0x94AA1800 0x30708358 */
MAD(c107,x, R9.w, c[107].y, c[107].x);

/* Slot 17: 0x00000000 0x018D6000 0x0DFE106C 0x28800FF8 */
SGE(R8,x, c[107].x, v0.w);

/* Slot 18: 0x00000000 0x008EA055 0x7436186C 0x9F900FF8 */
MAD(R9,xyzw, R7.y, c[117], R2);

/* Slot 19: 0x00000000 0x016D6000 0x0DFE106C 0x22800FF8 */
SLT(R8,z, c[107].x, v0.w);

/* Slot 20: 0x00000000 0x008F20AA 0x6436186D 0x1FB00FF8 */
MAD(R11,xyzw, R6.z, c[121], R4);

/* Slot 21: 0x00000000 0x00400055 0x15FF086C 0x24A00FF8 */
MUL(R10,y, R1.y, R8.w);

/* Slot 22: 0x00000000 0x008FA0AA 0x54AA1800 0x32000FF8 */
MAD(R0,z, R5.z, c[125].y, c[125].x);

/* Slot 23: 0x00000000 0x006E401B 0x0C36146E 0xDF200FF8 */
ADD(R2,xyzw, c[114], -R11);

/* Slot 24: 0x00000000 0x008E2100 0x85FE1800 0x30708388 */
MAD(c113,x, -R8.x, c[113].w, c[113].x);

/* Slot 25: 0x00000000 0x008E00AA 0x84001BFC 0x24400FF8 */
MAD(R4,y, R8.z, c[112].x, v0.w);

/* Slot 26: 0x00000000 0x008E20AA 0x84AB0800 0x30708388 */
MAD(c113,x, R8.z, R8.y, c[113].x);

/* Slot 27: 0x00000000 0x0A4DE0AA 0x84361956 0x9F540FF8 */
MUL(_temp_vec,xyzw, R8.z, c[111]);
EXP(R1,y, R10.y);
R5.xyzw = _temp_vec.xyzw;

/* Slot 28: 0x00000000 0x00400055 0x1401486C 0x22400FF8 */
MUL(R4,z, R1.y, R10.x);

/* Slot 29: 0x00000000 0x008DC055 0x4436186D 0x5F600FF8 */
MAD(R6,xyzw, R4.y, c[110], R5);

/* Slot 30: 0x00000000 0x008D8055 0x4436186D 0x5C700FF8 */
MAD(R7,xy, R4.y, c[108], R5);

/* Slot 31: 0x00000000 0x008EC0AA 0x4436186E 0x5FA00FF8 */
MAD(R10,xyzw, R4.z, c[118], R9);

/* Slot 32: 0x00000000 0x008DA055 0x4436186D 0x5E900FF8 */
MAD(R9,xyz, R4.y, c[109], R5);

/* Slot 33: 0x00000000 0x008F80AA 0x4436186C 0xDFB00FF8 */
MAD(R11,xyzw, R4.z, c[124], R3);

/* Slot 34: 0x00000000 0x0A00001B 0x08361155 0x90340FF8 */
EXP(R3,y, R6.y);

/* Slot 35: 0x00000000 0x0A00001B 0x08361155 0xD0040FF8 */
EXP(R0,y, R7.y);

/* Slot 36: 0x00000000 0x0A4D601B 0xB436186D 0x91B04584 */
MUL(_temp_vec,w, R11, c[107]);
EXP(c176,y, R6.x);
R11.w = _temp_vec.w;

/* Slot 37: 0x00000000 0x0A360055 0x0C3612A9 0x9830458C */
MOV(_temp_vec,x, c[176].y);
EXP(c177,y, R6.z);
R3.x = _temp_vec.x;

/* Slot 38: 0x00000000 0x0A362055 0x0C3613FD 0x92304594 */
MOV(_temp_vec,z, c[177].y);
EXP(c178,y, R6.w);
R3.z = _temp_vec.z;

/* Slot 39: 0x00000000 0x0A364055 0x0C36106D 0xD130459C */
MOV(_temp_vec,w, c[178].y);
EXP(c179,y, R7.x);
R3.w = _temp_vec.w;

/* Slot 40: 0x00000000 0x0A366055 0x0C36106E 0x580045A4 */
MOV(_temp_vec,x, c[179].y);
EXP(c180,y, R9.x);
R0.x = _temp_vec.x;

/* Slot 41: 0x00000000 0x0A00001B 0x08361156 0x50540FF8 */
EXP(R5,y, R9.y);

/* Slot 42: 0x00000000 0x0020001B 0xB436106C 0x2070F818 */
MOV(oD0,xyzw, R11);

/* Slot 43: 0x00000000 0x032E20AA 0x4554186C 0x1F60C364 */
MIN(_temp_vec,xyzw, R4.z, c[113].z);
MOV(c108,xy, R0);
R6.xyzw = _temp_vec.xyzw;

/* Slot 44: 0x00000000 0x0A368055 0x0C3612AA 0x585045AC */
MOV(_temp_vec,x, c[180].y);
EXP(c181,y, R9.z);
R5.x = _temp_vec.x;

/* Slot 45: 0x00000000 0x0236A055 0x0C36106C 0xD250F374 */
MOV(_temp_vec,z, c[181].y);
MOV(c110,xyzw, R3);
R5.z = _temp_vec.z;

/* Slot 46: 0x00000000 0x008E601B 0x64AA1800 0x3F300FF8 */
MAD(R3,xyzw, R6, c[115].y, c[115].x);

/* Slot 47: 0x00000000 0x008E40AA 0x4436186E 0x9F700FF8 */
MAD(R7,xyzw, R4.z, c[114], R10);

/* Slot 48: 0x00000000 0x008D6100 0x85541BFC 0x30701358 */
MAD(c107,w, -R8.x, c[107].z, c[107].w);

/* Slot 49: 0x00000000 0x008D6000 0x85FE1AAA 0x10701358 */
MAD(c107,w, R8.x, c[107].w, R8.z);

/* Slot 50: 0x00000000 0x0240001B 0x2436C86D 0x5F80E36C */
MUL(_temp_vec,xyzw, R2, R6);
MOV(c109,xyz, R5);
R8.xyzw = _temp_vec.xyzw;

/* Slot 51: 0x00000000 0x0080001B 0x8436686D 0xDF600FF8 */
MAD(R6,xyzw, R8, R3, R7);

/* Slot 52: 0x00000000 0x00EC801B 0x6436186C 0x20708800 */
DP4(oPos,x, R6, c[100]);

/* Slot 53: 0x00000000 0x00ECA01B 0x6436186C 0x20704800 */
DP4(oPos,y, R6, c[101]);

/* Slot 54: 0x00000000 0x00ECC01B 0x6436186C 0x20702800 */
DP4(oPos,z, R6, c[102]);

/* Slot 55: 0x00000000 0x00ECE01B 0x6436186C 0x20701800 */
DP4(oPos,w, R6, c[103]);

/* Slot 56: 0x00000000 0x06ED001B 0x64361BFF 0x11780FF8 */
DP4(_temp_vec,w, R6, c[104]);
RCC(R1,x, R12.w);
R7.w = _temp_vec.w;

/* Slot 57: 0x00000000 0x0847401B 0xC4361BFD 0xD011E800 */
MUL(oPos,xyz, R12, c[58]);
RSQ(R1,w, R7.w);

/* Slot 58: 0x00000000 0x004000FF 0x1554086C 0x21800FF8 */
MUL(R8,w, R1.w, R0.z);

/* Slot 59: 0x00000000 0x0087601B 0xC400286C 0x3070E800 */
MAD(oPos,xyz, R12, R1.x, c[59]);

/* Slot 60: 0x00000000 0x014000FF 0x85FE106C 0x20708831 */
MAX(oPts,x, R8.w, v0.w);

if (oPos.w == 0.0 || isinf(oPos.w)) {
vtx.inv_w = 1.0;
} else {
vtx.inv_w = 1.0 / oPos.w;
}
oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;
oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;
if (clipRange.y != clipRange.x) {
oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;
}
if (oPos.w < 0.0) {
oPos.xyz *= oPos.w;
} else {
oPos.w = 1.0;
}
float fogDistance = oFog.x;
if (isinf(fogDistance)) {
fogDistance = 0.0;
}
float fogFactor = fogParam[0] + fogDistance * fogParam[1];
fogFactor -= 1.0;
oFog.xyzw = vec4(fogFactor);

vtx.D0 = clamp(oD0, 0.0, 1.0) * vtx.inv_w;
vtx.D1 = clamp(oD1, 0.0, 1.0) * vtx.inv_w;
vtx.B0 = clamp(oB0, 0.0, 1.0) * vtx.inv_w;
vtx.B1 = clamp(oB1, 0.0, 1.0) * vtx.inv_w;
vtx.Fog = oFog.x * vtx.inv_w;
vtx.T0 = oT0 * vtx.inv_w;
vtx.T1 = oT1 * vtx.inv_w;
vtx.T2 = oT2 * vtx.inv_w;
vtx.T3 = oT3 * vtx.inv_w;
gl_Position = oPos;
gl_PointSize = oPts.x;

}

nv2a: vertex shader compilation failed: 0(274) : error C1503: undefined variable "c107"
0(300) : error C1503: undefined variable "c107"
0(324) : error C1503: undefined variable "c113"
0(330) : error C1503: undefined variable "c113"
0(363) : error C1503: undefined variable "c176"
0(368) : error C1503: undefined variable "c177"
0(373) : error C1503: undefined variable "c178"
0(378) : error C1503: undefined variable "c179"
0(383) : error C1503: undefined variable "c180"
0(394) : error C1503: undefined variable "c108"
0(399) : error C1503: undefined variable "c181"
0(404) : error C1503: undefined variable "c110"
0(414) : error C1503: undefined variable "c107"
0(417) : error C1503: undefined variable "c107"
0(421) : error C1503: undefined variable "c109"

@Triticum0 Triticum0 added the bug Something isn't working label Jul 17, 2022
@Triticum0 Triticum0 changed the title NHL-2002 unimplemented writable const register Assertion:NHL 2002 unimplemented writable const register Jul 17, 2022
@Triticum0 Triticum0 changed the title Assertion:NHL 2002 unimplemented writable const register Assertion : NHL 2002 unimplemented writable const register Jul 17, 2022
@abaire
Copy link
Contributor

abaire commented Jul 17, 2022

In #1080 I added a CPU-based renderer to handle writable const/context registers. In theory we could implement a path for NHL 2002 that falls back to running the shader on the CPU and uploads pre-transformed results to a special vertex shader path.

I'm still pondering whether we could better handle writable context registers in the vertex shader, however, using transform feedback to capture any modified results. Roughly we'd decode the vsh and identify any writes to context registers. Generate out vec4 wr_c<register_num> variables for each, initializing them to the appropriate c[register_num] uniform and flagging them via glTransformFeedbackVaryings. After the draw we'd fetch the values out of the transform feedback buffer and update the CPU-side state with the value(s) associated with the last relevant vertex in order to match HW behavior.

@abaire
Copy link
Contributor

abaire commented Jul 22, 2022

According to Discord this is triggered when doing a CPU vs CPU game: https://discord.com/channels/680221390359887933/680221390359888154/999822976667099196

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants