Add Metal backend (#1287)

This commit is contained in:
SamoZ256 2025-12-06 17:14:25 +01:00 committed by GitHub
parent 5520613dc3
commit 26e40a4bce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
108 changed files with 14986 additions and 453 deletions

View file

@ -8,6 +8,14 @@
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Common/MemPtr.h"
#include "HW/Latte/ISA/LatteReg.h"
#if ENABLE_METAL
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#endif
// Defined in LatteTextureLegacy.cpp
Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4);
/*
* Return index of used color attachment based on shader pixel export index (0-7)
@ -289,15 +297,15 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex
LatteDecompilerShader* shader = shaderContext->shader;
for(auto& texInstruction : cfInstruction->instructionsTEX)
{
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C ||
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ ||
texInstruction.opcode == GPU7_TEX_INST_FETCH4 ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G ||
texInstruction.opcode == GPU7_TEX_INST_FETCH4 ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G ||
texInstruction.opcode == GPU7_TEX_INST_LD )
{
if (texInstruction.textureFetch.textureIndex < 0 || texInstruction.textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS)
@ -315,7 +323,7 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex
shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] = texInstruction.textureFetch.samplerIndex;
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ)
shader->textureUsesDepthCompare[texInstruction.textureFetch.textureIndex] = true;
bool useTexelCoords = false;
if (texInstruction.opcode == GPU7_TEX_INST_SAMPLE && (texInstruction.textureFetch.unnormalized[0] && texInstruction.textureFetch.unnormalized[1] && texInstruction.textureFetch.unnormalized[2] && texInstruction.textureFetch.unnormalized[3]))
useTexelCoords = true;
@ -384,7 +392,7 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
LatteDecompilerShader* shader = shaderContext->shader;
if( shader->shaderType == LatteConst::ShaderType::Pixel )
{
if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8 )
if (cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8)
{
// remember color outputs that are written
for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++)
@ -393,9 +401,11 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
shader->pixelColorOutputMask |= (1<<colorOutputIndex);
}
}
else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 )
else if (cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61)
{
// writes pixel depth
// Only check for depth buffer mask on Metal, as its not in the PS hash on other backends
if (g_renderer->GetType() != RendererAPI::Metal || LatteMRT::GetActiveDepthBufferMask(*shaderContext->contextRegistersNew))
shader->depthMask = true;
}
else
debugBreakpoint();
@ -421,7 +431,7 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
void LatteDecompiler_analyzeSubroutine(LatteDecompilerShaderContext* shaderContext, uint32 cfAddr)
{
// analyze CF and clauses up to RET statement
// todo - find cfInstruction index from cfAddr
cemu_assert_debug(false);
@ -500,6 +510,18 @@ namespace LatteDecompiler
}
}
void _initTextureBindingPointsMTL(LatteDecompilerShaderContext* decompilerContext)
{
// for Vulkan we use consecutive indices
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!decompilerContext->output->textureUnitMask[i] || decompilerContext->shader->textureRenderTargetIndex[i] != 255)
continue;
decompilerContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] = decompilerContext->currentTextureBindingPointMTL;
decompilerContext->currentTextureBindingPointMTL++;
}
}
void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext)
{
decompilerContext->hasUniformVarBlock = false;
@ -507,9 +529,9 @@ namespace LatteDecompiler
decompilerContext->hasUniformVarBlock = true;
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
decompilerContext->hasUniformVarBlock = true;
bool hasAnyViewportScaleDisabled =
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
bool hasAnyViewportScaleDisabled =
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA();
// we currently only support all on/off. Individual component scaling is not supported
@ -537,6 +559,15 @@ namespace LatteDecompiler
{
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
}
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
bool usesGeometryShader = UseGeometryShader(*decompilerContext->contextRegistersNew, decompilerContext->options->usesGeometryShader);
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && usesGeometryShader)
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance
}
#endif
}
void _initUniformBindingPoints(LatteDecompilerShaderContext* decompilerContext)
@ -554,14 +585,13 @@ namespace LatteDecompiler
}
}
// assign binding point to uniform var block
decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block
if (decompilerContext->hasUniformVarBlock)
{
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint = decompilerContext->currentBufferBindingPointMTL;
decompilerContext->currentBufferBindingPointMTL++;
}
else
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1;
// assign binding points to uniform buffers
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{
@ -580,6 +610,8 @@ namespace LatteDecompiler
decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] = decompilerContext->currentBufferBindingPointMTL;
decompilerContext->currentBufferBindingPointMTL++;
}
// for OpenGL we use the relative buffer index
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
@ -601,6 +633,8 @@ namespace LatteDecompiler
{
decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++;
decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint = decompilerContext->currentBufferBindingPointMTL;
decompilerContext->currentBufferBindingPointMTL++;
}
}
@ -617,6 +651,7 @@ namespace LatteDecompiler
{
decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex;
decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex;
decompilerContext->output->resourceMappingMTL.attributeMapping[i] = bindingIndex;
bindingIndex++;
}
}
@ -805,7 +840,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!shaderContext->output->textureUnitMask[i])
if (!shaderContext->output->textureUnitMask[i])
{
// texture unit not used
shader->textureUnitDim[i] = (Latte::E_DIM)0xFF;
@ -827,6 +862,78 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++;
}
shader->textureRenderTargetIndex[i] = 255;
}
// check if textures are used as render targets
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
struct {
sint32 index;
MPTR physAddr;
Latte::E_GX2SURFFMT format;
Latte::E_HWTILEMODE tileMode;
} colorBuffers[LATTE_NUM_COLOR_TARGET]{};
uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew);
sint32 colorBufferCount = 0;
for (sint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto& colorBuffer = colorBuffers[colorBufferCount];
if (((colorBufferMask) & (1 << i)) == 0)
continue; // color buffer not enabled
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + i);
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE];
MPTR colorBufferPhysMem = regColorBufferBase;
Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF);
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(i, *shaderContext->contextRegistersNew);
colorBuffer = {i, colorBufferPhysMem, colorBufferFormat, colorBufferTileMode};
colorBufferCount++;
}
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
{
sint32 textureIndex = shader->textureUnitList[i];
const auto& texRegister = texRegs[textureIndex];
// get physical address of texture data
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
if (physAddr == MPTR_NULL)
continue; // invalid data
auto tileMode = texRegister.word0.get_TILE_MODE();
// Check for dimension
auto dim = shader->textureUnitDim[textureIndex];
// TODO: 2D arrays could be supported as well
if (dim != Latte::E_DIM::DIM_2D)
continue;
// Check for mip level
auto lastMip = texRegister.word5.get_LAST_LEVEL();
// TODO: multiple mip levels could be supported as well
if (lastMip != 0)
continue;
Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4);
// Check if the texture is used as render target
for (sint32 j = 0; j < colorBufferCount; j++)
{
const auto& colorBuffer = colorBuffers[j];
if (physAddr == colorBuffer.physAddr && format == colorBuffer.format && tileMode == colorBuffer.tileMode)
{
shader->textureRenderTargetIndex[textureIndex] = colorBuffer.index;
break;
}
}
}
}
// for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
@ -1002,6 +1109,10 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shaderContext->output->resourceMappingVK.setIndex = 2;
LatteDecompiler::_initTextureBindingPointsGL(shaderContext);
LatteDecompiler::_initTextureBindingPointsVK(shaderContext);
LatteDecompiler::_initTextureBindingPointsMTL(shaderContext);
LatteDecompiler::_initUniformBindingPoints(shaderContext);
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
shaderContext->output->resourceMappingMTL.verticesPerInstanceBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
}