Cemu/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp
Crementif 280de39471 vk: Always embed shader source code when RenderDoc (etc.) is attached
The only downside is that attaching RenderDoc to Cemu will cause it to ignore the spirv cache. This causes slightly longer load times when RenderDoc is attached and you had a prior shader cache that contained the non-debug shader.
2025-11-23 17:23:37 +01:00

484 lines
16 KiB
C++

#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
#include "config/ActiveSettings.h"
#include "config/CemuConfig.h"
#include "util/helpers/ConcurrentQueue.h"
#include "Cemu/FileCache/FileCache.h"
#include <glslang/Public/ShaderLang.h>
#include <glslang/SPIRV/GlslangToSpv.h>
#include "util/helpers/helpers.h"
bool s_isLoadingShadersVk{ false };
class FileCache* s_spirvCache{nullptr};
extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async;
consteval TBuiltInResource GetDefaultBuiltInResource()
{
TBuiltInResource defaultResource = {};
defaultResource.maxLights = 32;
defaultResource.maxClipPlanes = 6;
defaultResource.maxTextureUnits = 32;
defaultResource.maxTextureCoords = 32;
defaultResource.maxVertexAttribs = 64;
defaultResource.maxVertexUniformComponents = 4096;
defaultResource.maxVaryingFloats = 64;
defaultResource.maxVertexTextureImageUnits = 32;
defaultResource.maxCombinedTextureImageUnits = 80;
defaultResource.maxTextureImageUnits = 32;
defaultResource.maxFragmentUniformComponents = 4096;
defaultResource.maxDrawBuffers = 32;
defaultResource.maxVertexUniformVectors = 128;
defaultResource.maxVaryingVectors = 8;
defaultResource.maxFragmentUniformVectors = 16;
defaultResource.maxVertexOutputVectors = 16;
defaultResource.maxFragmentInputVectors = 15;
defaultResource.minProgramTexelOffset = -8;
defaultResource.maxProgramTexelOffset = 7;
defaultResource.maxClipDistances = 8;
defaultResource.maxComputeWorkGroupCountX = 65535;
defaultResource.maxComputeWorkGroupCountY = 65535;
defaultResource.maxComputeWorkGroupCountZ = 65535;
defaultResource.maxComputeWorkGroupSizeX = 1024;
defaultResource.maxComputeWorkGroupSizeY = 1024;
defaultResource.maxComputeWorkGroupSizeZ = 64;
defaultResource.maxComputeUniformComponents = 1024;
defaultResource.maxComputeTextureImageUnits = 16;
defaultResource.maxComputeImageUniforms = 8;
defaultResource.maxComputeAtomicCounters = 8;
defaultResource.maxComputeAtomicCounterBuffers = 1;
defaultResource.maxVaryingComponents = 60;
defaultResource.maxVertexOutputComponents = 64;
defaultResource.maxGeometryInputComponents = 64;
defaultResource.maxGeometryOutputComponents = 128;
defaultResource.maxFragmentInputComponents = 128;
defaultResource.maxImageUnits = 8;
defaultResource.maxCombinedImageUnitsAndFragmentOutputs = 8;
defaultResource.maxCombinedShaderOutputResources = 8;
defaultResource.maxImageSamples = 0;
defaultResource.maxVertexImageUniforms = 0;
defaultResource.maxTessControlImageUniforms = 0;
defaultResource.maxTessEvaluationImageUniforms = 0;
defaultResource.maxGeometryImageUniforms = 0;
defaultResource.maxFragmentImageUniforms = 8;
defaultResource.maxCombinedImageUniforms = 8;
defaultResource.maxGeometryTextureImageUnits = 16;
defaultResource.maxGeometryOutputVertices = 256;
defaultResource.maxGeometryTotalOutputComponents = 1024;
defaultResource.maxGeometryUniformComponents = 1024;
defaultResource.maxGeometryVaryingComponents = 64;
defaultResource.maxTessControlInputComponents = 128;
defaultResource.maxTessControlOutputComponents = 128;
defaultResource.maxTessControlTextureImageUnits = 16;
defaultResource.maxTessControlUniformComponents = 1024;
defaultResource.maxTessControlTotalOutputComponents = 4096;
defaultResource.maxTessEvaluationInputComponents = 128;
defaultResource.maxTessEvaluationOutputComponents = 128;
defaultResource.maxTessEvaluationTextureImageUnits = 16;
defaultResource.maxTessEvaluationUniformComponents = 1024;
defaultResource.maxTessPatchComponents = 120;
defaultResource.maxPatchVertices = 32;
defaultResource.maxTessGenLevel = 64;
defaultResource.maxViewports = 16;
defaultResource.maxVertexAtomicCounters = 0;
defaultResource.maxTessControlAtomicCounters = 0;
defaultResource.maxTessEvaluationAtomicCounters = 0;
defaultResource.maxGeometryAtomicCounters = 0;
defaultResource.maxFragmentAtomicCounters = 8;
defaultResource.maxCombinedAtomicCounters = 8;
defaultResource.maxAtomicCounterBindings = 1;
defaultResource.maxVertexAtomicCounterBuffers = 0;
defaultResource.maxTessControlAtomicCounterBuffers = 0;
defaultResource.maxTessEvaluationAtomicCounterBuffers = 0;
defaultResource.maxGeometryAtomicCounterBuffers = 0;
defaultResource.maxFragmentAtomicCounterBuffers = 1;
defaultResource.maxCombinedAtomicCounterBuffers = 1;
defaultResource.maxAtomicCounterBufferSize = 16384;
defaultResource.maxTransformFeedbackBuffers = 4;
defaultResource.maxTransformFeedbackInterleavedComponents = 64;
defaultResource.maxCullDistances = 8;
defaultResource.maxCombinedClipAndCullDistances = 8;
defaultResource.maxSamples = 4;
defaultResource.maxMeshOutputVerticesNV = 256;
defaultResource.maxMeshOutputPrimitivesNV = 512;
defaultResource.maxMeshWorkGroupSizeX_NV = 32;
defaultResource.maxMeshWorkGroupSizeY_NV = 1;
defaultResource.maxMeshWorkGroupSizeZ_NV = 1;
defaultResource.maxTaskWorkGroupSizeX_NV = 32;
defaultResource.maxTaskWorkGroupSizeY_NV = 1;
defaultResource.maxTaskWorkGroupSizeZ_NV = 1;
defaultResource.maxMeshViewCountNV = 4;
defaultResource.limits = {};
defaultResource.limits.nonInductiveForLoops = true;
defaultResource.limits.whileLoops = true;
defaultResource.limits.doWhileLoops = true;
defaultResource.limits.generalUniformIndexing = true;
defaultResource.limits.generalAttributeMatrixVectorIndexing = true;
defaultResource.limits.generalVaryingIndexing = true;
defaultResource.limits.generalSamplerIndexing = true;
defaultResource.limits.generalVariableIndexing = true;
defaultResource.limits.generalConstantMatrixVectorIndexing = true;
return defaultResource;
};
class _ShaderVkThreadPool
{
public:
void StartThreads()
{
if (m_threadsActive.exchange(true))
return;
// create thread pool
const uint32 threadCount = 2;
for (uint32 i = 0; i < threadCount; ++i)
s_threads.emplace_back(&_ShaderVkThreadPool::CompilerThreadFunc, this);
}
void StopThreads()
{
if (!m_threadsActive.exchange(false))
return;
for (uint32 i = 0; i < s_threads.size(); ++i)
s_compilationQueueCount.increment();
for (auto& it : s_threads)
it.join();
s_threads.clear();
}
~_ShaderVkThreadPool()
{
StopThreads();
}
void CompilerThreadFunc()
{
SetThreadName("vkShaderComp");
while (m_threadsActive.load(std::memory_order::relaxed))
{
s_compilationQueueCount.decrementWithWait();
s_compilationQueueMutex.lock();
if (s_compilationQueue.empty())
{
// queue empty again, shaders compiled synchronously via PreponeCompilation()
s_compilationQueueMutex.unlock();
continue;
}
RendererShaderVk* job = s_compilationQueue.front();
s_compilationQueue.pop_front();
// set compilation state
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderVk::COMPILATION_STATE::QUEUED);
job->m_compilationState.setValue(RendererShaderVk::COMPILATION_STATE::COMPILING);
s_compilationQueueMutex.unlock();
// compile
job->CompileInternal(false);
++g_compiled_shaders_async;
// mark as compiled
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderVk::COMPILATION_STATE::COMPILING);
job->m_compilationState.setValue(RendererShaderVk::COMPILATION_STATE::DONE);
}
}
bool HasThreadsRunning() const { return m_threadsActive; }
public:
std::vector<std::thread> s_threads;
std::deque<RendererShaderVk*> s_compilationQueue;
CounterSemaphore s_compilationQueueCount;
std::mutex s_compilationQueueMutex;
private:
std::atomic<bool> m_threadsActive;
}ShaderVkThreadPool;
RendererShaderVk::RendererShaderVk(ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& glslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_glslCode(glslCode)
{
// start async compilation
ShaderVkThreadPool.s_compilationQueueMutex.lock();
m_compilationState.setValue(COMPILATION_STATE::QUEUED);
ShaderVkThreadPool.s_compilationQueue.push_back(this);
ShaderVkThreadPool.s_compilationQueueCount.increment();
ShaderVkThreadPool.s_compilationQueueMutex.unlock();
cemu_assert_debug(ShaderVkThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called
}
RendererShaderVk::~RendererShaderVk()
{
while (!list_pipelineInfo.empty())
delete list_pipelineInfo[0];
VkDevice vkDev = VulkanRenderer::GetInstance()->GetLogicalDevice();
vkDestroyShaderModule(vkDev, m_shader_module, nullptr);
}
void RendererShaderVk::Init()
{
ShaderVkThreadPool.StartThreads();
}
void RendererShaderVk::Shutdown()
{
ShaderVkThreadPool.StopThreads();
}
sint32 RendererShaderVk::GetUniformLocation(const char* name)
{
cemu_assert_suspicious();
return 0;
}
void RendererShaderVk::SetUniform1i(sint32 location, sint32 value)
{
cemu_assert_suspicious();
}
void RendererShaderVk::SetUniform1f(sint32 location, float value)
{
cemu_assert_suspicious();
}
void RendererShaderVk::SetUniform2fv(sint32 location, void* data, sint32 count)
{
cemu_assert_suspicious();
}
void RendererShaderVk::SetUniform4iv(sint32 location, void* data, sint32 count)
{
cemu_assert_suspicious();
}
void RendererShaderVk::CreateVkShaderModule(std::span<uint32> spirvBuffer)
{
VkShaderModuleCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
createInfo.codeSize = spirvBuffer.size_bytes();
createInfo.pCode = spirvBuffer.data();
VulkanRenderer* vkr = (VulkanRenderer*)g_renderer.get();
VkDevice m_device = vkr->GetLogicalDevice();
VkResult result = vkCreateShaderModule(m_device, &createInfo, nullptr, &m_shader_module);
if (result != VK_SUCCESS)
{
cemuLog_log(LogType::Force, "Vulkan: Shader error");
throw std::runtime_error(fmt::format("Failed to create shader module: {}", result));
}
// set debug name
if (vkr->IsDebugUtilsEnabled() && vkSetDebugUtilsObjectNameEXT)
{
VkDebugUtilsObjectNameInfoEXT objName{};
objName.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
objName.objectType = VK_OBJECT_TYPE_SHADER_MODULE;
objName.pNext = nullptr;
objName.objectHandle = (uint64_t)m_shader_module;
auto objNameStr = fmt::format("shader_{:016x}_{:016x}", m_baseHash, m_auxHash);
objName.pObjectName = objNameStr.c_str();
vkSetDebugUtilsObjectNameEXT(vkr->GetLogicalDevice(), &objName);
}
}
void RendererShaderVk::FinishCompilation()
{
m_glslCode.clear();
m_glslCode.shrink_to_fit();
}
void RendererShaderVk::CompileInternal(bool isRenderThread)
{
bool compileWithDebugInfo = ((VulkanRenderer*)g_renderer.get())->IsDebugUtilsEnabled() && vkSetDebugUtilsObjectNameEXT;
// try to retrieve SPIR-V module from cache
if (s_isLoadingShadersVk && (m_isGameShader && !m_isGfxPackShader) && s_spirvCache && !compileWithDebugInfo)
{
cemu_assert_debug(m_baseHash != 0);
uint64 h1, h2;
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
std::vector<uint8> cacheFileData;
if (s_spirvCache->GetFile({ h1, h2 }, cacheFileData))
{
// generate shader from cached SPIR-V buffer
CreateVkShaderModule(std::span<uint32>((uint32*)cacheFileData.data(), cacheFileData.size() / sizeof(uint32)));
FinishCompilation();
return;
}
}
EShLanguage state;
switch (GetType())
{
case ShaderType::kVertex:
state = EShLangVertex;
break;
case ShaderType::kFragment:
state = EShLangFragment;
break;
case ShaderType::kGeometry:
state = EShLangGeometry;
break;
default:
cemu_assert_debug(false);
}
glslang::TShader Shader(state);
const char* cstr = m_glslCode.c_str();
Shader.setStrings(&cstr, 1);
Shader.setEnvInput(glslang::EShSourceGlsl, state, glslang::EShClientVulkan, 100);
Shader.setEnvClient(glslang::EShClientVulkan, glslang::EShTargetClientVersion::EShTargetVulkan_1_1);
Shader.setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
std::string PreprocessedGLSL;
glslang::TShader::ForbidIncluder Includer;
TBuiltInResource Resources = GetDefaultBuiltInResource();
EShMessages messagesPreprocess = (EShMessages)(EShMsgSpvRules | EShMsgVulkanRules);
if (!Shader.preprocess(&Resources, 450, ENoProfile, false, false, messagesPreprocess, &PreprocessedGLSL, Includer))
{
cemuLog_log(LogType::Force, fmt::format("GLSL Preprocessing Failed For {:016x}_{:016x}: \"{}\"", m_baseHash, m_auxHash, Shader.getInfoLog()));
FinishCompilation();
return;
}
const char* PreprocessedCStr = PreprocessedGLSL.c_str();
Shader.setStrings(&PreprocessedCStr, 1);
EShMessages messagesParseLink = (EShMessages)(EShMsgSpvRules | EShMsgVulkanRules);
if (!Shader.parse(&Resources, 100, false, messagesParseLink))
{
cemuLog_log(LogType::Force, fmt::format("GLSL parsing failed for {:016x}_{:016x}: \"{}\"", m_baseHash, m_auxHash, Shader.getInfoLog()));
cemuLog_logDebug(LogType::Force, "GLSL source:\n{}", m_glslCode);
cemu_assert_debug(false);
FinishCompilation();
return;
}
glslang::TProgram Program;
Program.addShader(&Shader);
if (!Program.link(messagesParseLink))
{
cemuLog_log(LogType::Force, fmt::format("GLSL linking failed for {:016x}_{:016x}: \"{}\"", m_baseHash, m_auxHash, Program.getInfoLog()));
cemu_assert_debug(false);
FinishCompilation();
return;
}
if (!Program.mapIO())
{
cemuLog_log(LogType::Force, fmt::format("GLSL linking failed for {:016x}_{:016x}: \"{}\"", m_baseHash, m_auxHash, Program.getInfoLog()));
FinishCompilation();
return;
}
// temp storage for SPIR-V after translation
std::vector<uint32> spirvBuffer;
spv::SpvBuildLogger logger;
glslang::SpvOptions spvOptions;
spvOptions.disableOptimizer = false;
spvOptions.validate = false;
spvOptions.optimizeSize = true;
if (compileWithDebugInfo)
{
spvOptions.generateDebugInfo = true;
spvOptions.emitNonSemanticShaderDebugInfo = true;
spvOptions.emitNonSemanticShaderDebugSource = true;
Shader.addSourceText(m_glslCode.c_str(), (uint32)m_glslCode.size());
Shader.setSourceFile(fmt::format("shader_{:016x}_{:016x}.glsl", m_baseHash, m_auxHash).c_str());
}
//auto beginTime = benchmarkTimer_start();
GlslangToSpv(*Program.getIntermediate(state), spirvBuffer, &logger, &spvOptions);
//double timeDur = benchmarkTimer_stop(beginTime);
//forceLogRemoveMe_printf("Shader GLSL-to-SPIRV compilation took %lfms Size %08x", timeDur, spirvBuffer.size()*4);
// store in cache, unless it got compiled with debug info or is a modified shader from a gfx pack
if (s_spirvCache && m_isGameShader && m_isGfxPackShader == false && !compileWithDebugInfo)
{
uint64 h1, h2;
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
s_spirvCache->AddFile({ h1, h2 }, (const uint8*)spirvBuffer.data(), spirvBuffer.size() * sizeof(uint32));
}
CreateVkShaderModule(spirvBuffer);
// count compiled shader
if (!s_isLoadingShadersVk)
{
if( m_isGameShader )
++g_compiled_shaders_total;
}
FinishCompilation();
}
void RendererShaderVk::PreponeCompilation(bool isRenderThread)
{
ShaderVkThreadPool.s_compilationQueueMutex.lock();
bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED);
if (isStillQueued)
{
// remove from queue
ShaderVkThreadPool.s_compilationQueue.erase(std::remove(ShaderVkThreadPool.s_compilationQueue.begin(), ShaderVkThreadPool.s_compilationQueue.end(), this), ShaderVkThreadPool.s_compilationQueue.end());
m_compilationState.setValue(COMPILATION_STATE::COMPILING);
}
ShaderVkThreadPool.s_compilationQueueMutex.unlock();
if (!isStillQueued)
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
--g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async
return;
}
else
{
// compile synchronously
CompileInternal(isRenderThread);
m_compilationState.setValue(COMPILATION_STATE::DONE);
}
}
bool RendererShaderVk::IsCompiled()
{
return m_compilationState.hasState(COMPILATION_STATE::DONE);
};
bool RendererShaderVk::WaitForCompiled()
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
return true;
}
void RendererShaderVk::ShaderCacheLoading_begin(uint64 cacheTitleId)
{
if (s_spirvCache)
{
delete s_spirvCache;
s_spirvCache = nullptr;
}
uint32 spirvCacheMagic = GeneratePrecompiledCacheId();
const std::string cacheFilename = fmt::format("{:016x}_spirv.bin", cacheTitleId);
const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename);
s_spirvCache = FileCache::Open(cachePath, true, spirvCacheMagic);
if (s_spirvCache == nullptr)
cemuLog_log(LogType::Force, "Unable to open SPIR-V cache {}", cacheFilename);
s_isLoadingShadersVk = true;
}
void RendererShaderVk::ShaderCacheLoading_end()
{
// keep g_spirvCache open since we will write to it while the game is running
s_isLoadingShadersVk = false;
}
void RendererShaderVk::ShaderCacheLoading_Close()
{
delete s_spirvCache;
s_spirvCache = nullptr;
}