Skip to content

Commit

Permalink
Merge pull request #682 from Exzap/shader-improvements-2
Browse files Browse the repository at this point in the history
Utilize VK_KHR_SHADER_FLOAT_CONTROLS if available
  • Loading branch information
Exzap committed Mar 11, 2023
2 parents 43c9a4e + a2c5183 commit 3acdd47
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 228 deletions.
52 changes: 35 additions & 17 deletions src/Cafe/HW/Latte/Core/LatteShader.cpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
#include "Cafe/GraphicPack/GraphicPack2.h"
#include "util/helpers/StringParser.h"
#include "config/ActiveSettings.h"
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
#include "util/Zir/Core/ZpIRDebug.h"
#include "util/containers/flat_hash_map.hpp"
#include <cinttypes>

// experimental new decompiler (WIP)
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
#include "util/Zir/Core/ZpIRDebug.h"
#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"

struct _ShaderHashCache
{
uint64 prevHash1;
Expand Down Expand Up @@ -544,7 +549,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
// hash stride for streamout buffers
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{
if(!vertexShader->streamoutBufferWriteMask2[i])
if(!vertexShader->streamoutBufferWriteMask[i])
continue;
uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4];
auxHash = std::rotl<uint64>(auxHash, 7);
Expand Down Expand Up @@ -612,7 +617,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
// copy texture info
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
// copy streamout info
shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask;
shader->streamoutBufferWriteMask = decompilerOutput.streamoutBufferWriteMask;
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
// copy uniform offsets
// for OpenGL these are retrieved in _prepareSeparableUniforms()
Expand Down Expand Up @@ -672,10 +677,18 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
return shader;
}

#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"
void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled)
{
options.usesGeometryShader = geometryShaderEnabled;
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false;
if (g_renderer->GetType() == RendererAPI::Vulkan)
{
options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO();
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32();
}
}

LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader2(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
{
/* Analyze shader to gather general information about inputs/outputs */
Latte::ShaderDescription shaderDescription;
Expand Down Expand Up @@ -725,14 +738,15 @@ LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash,
// compile new vertex shader (relies partially on current state)
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
{
// new decompiler
//LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);
// new decompiler test
//LatteShader_CompileSeparableVertexShader2(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);

// legacy decompiler
LatteDecompilerOptions options;
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);

LatteDecompilerOutput_t decompilerOutput{};
LatteFetchShader* fetchShaderList[1];
fetchShaderList[0] = fetchShader;
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShader, options, &decompilerOutput);
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
vsAuxHash = vertexShader->auxHash;
if (vertexShader->hasError == false)
Expand All @@ -759,10 +773,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash,

LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
{
LatteDecompilerOptions options;
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);

LatteDecompilerOutput_t decompilerOutput{};
LatteFetchShader* fetchShaderList[1];
fetchShaderList[0] = _activeFetchShader;
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput);
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, _activeVertexShader->ringParameterCount, options, &decompilerOutput);
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
if (geometryShader->hasError == false)
{
Expand All @@ -787,8 +802,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas

LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
{
LatteDecompilerOptions options;
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);

LatteDecompilerOutput_t decompilerOutput{};
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, options, &decompilerOutput);
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
psAuxHash = pixelShader->auxHash;
LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader);
Expand Down
1 change: 1 addition & 0 deletions src/Cafe/HW/Latte/Core/LatteShader.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ extern uint64 _shaderBaseHash_vs;
extern uint64 _shaderBaseHash_gs;
extern uint64 _shaderBaseHash_ps;

void LatteShader_GetDecompilerOptions(struct LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled);
LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister);

void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync);
Expand Down
22 changes: 13 additions & 9 deletions src/Cafe/HW/Latte/Core/LatteShaderCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,22 +641,22 @@ bool LatteShaderCache_readSeparableVertexShader(MemStreamReader& streamReader, u
return false;
if (streamReader.hasError() || !streamReader.isEndOfStream())
return false;
// update PS inputs (influence VS shader outputs)
// update PS inputs (affects VS shader outputs)
LatteShader_UpdatePSInputs(lcr->GetRawView());
// get fetch shader
LatteFetchShader::CacheHash fsHash = LatteFetchShader::CalculateCacheHash((uint32*)fetchShaderData.data(), fetchShaderData.size());
LatteFetchShader* fetchShader = LatteShaderRecompiler_createFetchShader(fsHash, lcr->GetRawView(), (uint32*)fetchShaderData.data(), fetchShaderData.size());
// determine decompiler options
LatteDecompilerOptions options;
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
// decompile vertex shader
LatteDecompilerOutput_t decompilerOutput{};
LatteFetchShader* fetchShaderList[1];
fetchShaderList[0] = fetchShader;
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShader, options, &decompilerOutput);
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
// compile
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader);
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderData.data(), vertexShaderData.size());
LatteShaderCache_loadOrCompileSeparableShader(vertexShader, shaderBaseHash, shaderAuxHash);
catchOpenGLError();
LatteSHRC_RegisterShader(vertexShader, shaderBaseHash, shaderAuxHash);
return true;
}
Expand Down Expand Up @@ -688,15 +688,17 @@ bool LatteShaderCache_readSeparableGeometryShader(MemStreamReader& streamReader,
return false;
// update PS inputs
LatteShader_UpdatePSInputs(lcr->GetRawView());
// determine decompiler options
LatteDecompilerOptions options;
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
// decompile geometry shader
LatteDecompilerOutput_t decompilerOutput{};
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, &decompilerOutput);
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), vsRingParameterCount, options, &decompilerOutput);
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
// compile
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader);
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderData.data(), geometryShaderData.size());
LatteShaderCache_loadOrCompileSeparableShader(geometryShader, shaderBaseHash, shaderAuxHash);
catchOpenGLError();
LatteSHRC_RegisterShader(geometryShader, shaderBaseHash, shaderAuxHash);
return true;
}
Expand Down Expand Up @@ -724,15 +726,17 @@ bool LatteShaderCache_readSeparablePixelShader(MemStreamReader& streamReader, ui
return false;
// update PS inputs
LatteShader_UpdatePSInputs(lcr->GetRawView());
// determine decompiler options
LatteDecompilerOptions options;
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
// decompile pixel shader
LatteDecompilerOutput_t decompilerOutput{};
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), options, &decompilerOutput);
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
// compile
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader);
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderData.data(), pixelShaderData.size());
LatteShaderCache_loadOrCompileSeparableShader(pixelShader, shaderBaseHash, shaderAuxHash);
catchOpenGLError();
LatteSHRC_RegisterShader(pixelShader, shaderBaseHash, shaderAuxHash);
return true;
}
Expand Down
6 changes: 3 additions & 3 deletions src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,16 @@ void LatteStreamout_PrepareDrawcall(uint32 count, uint32 instanceCount)
if (geometryShader)
{
#ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(vertexShader->streamoutBufferWriteMask2.any() == false);
cemu_assert_debug(vertexShader->streamoutBufferWriteMask.any() == false);
#endif
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
if (geometryShader->streamoutBufferWriteMask2[i])
if (geometryShader->streamoutBufferWriteMask[i])
streamoutWriteMask |= (1 << i);
}
else
{
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
if (vertexShader->streamoutBufferWriteMask2[i])
if (vertexShader->streamoutBufferWriteMask[i])
streamoutWriteMask |= (1 << i);
}
activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask;
Expand Down

0 comments on commit 3acdd47

Please sign in to comment.