GX2+TCL: Reimplement command buffer submission

- GX2 utilizes TCL(.rpl) API for command submission instead of directly writing to an internal GPU fifo
- Submission & retire timestamps are correctly implemented as incremental counters
- Command buffering behaviour matches console
- Fixes race conditions on aarch64
This commit is contained in:
Exzap 2025-05-14 18:59:50 +02:00
parent 96765e4ac6
commit 28ea70b6d8
21 changed files with 761 additions and 472 deletions

View file

@ -2,21 +2,19 @@
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Espresso/Const.h"
struct GX2WriteGatherPipeState
namespace GX2
{
uint8* gxRingBuffer;
// each core has it's own write gatherer and display list state (writing)
uint8* writeGatherPtrGxBuffer[Espresso::CORE_COUNT];
uint8** writeGatherPtrWrite[Espresso::CORE_COUNT];
uint8* writeGatherPtrDisplayList[Espresso::CORE_COUNT];
MPTR displayListStart[Espresso::CORE_COUNT];
uint32 displayListMaxSize[Espresso::CORE_COUNT];
struct GX2PerCoreCBState
{
uint32be* bufferPtr;
uint32 bufferSizeInU32s;
uint32be* currentWritePtr;
bool isDisplayList;
};
extern GX2PerCoreCBState s_perCoreCBState[Espresso::CORE_COUNT];
};
extern GX2WriteGatherPipeState gx2WriteGatherPipe;
void GX2ReserveCmdSpace(uint32 reservedFreeSpaceInU32); // move to GX2 namespace eventually
void gx2WriteGather_submitU32AsBE(uint32 v);
void gx2WriteGather_submitU32AsLE(uint32 v);
void gx2WriteGather_submitU32AsLEArray(uint32* v, uint32 numValues);
@ -27,7 +25,8 @@ uint32 PPCInterpreter_getCurrentCoreIndex();
template <typename ...Targs>
inline void gx2WriteGather_submit_(uint32 coreIndex, uint32be* writePtr)
{
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = (uint8*)writePtr;
GX2::s_perCoreCBState[coreIndex].currentWritePtr = writePtr;
cemu_assert_debug(GX2::s_perCoreCBState[coreIndex].currentWritePtr <= (GX2::s_perCoreCBState[coreIndex].bufferPtr + GX2::s_perCoreCBState[coreIndex].bufferSizeInU32s));
}
template <typename T, typename ...Targs>
@ -75,17 +74,23 @@ template <typename ...Targs>
inline void gx2WriteGather_submit(Targs... args)
{
uint32 coreIndex = PPCInterpreter_getCurrentCoreIndex();
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr)
if (GX2::s_perCoreCBState[coreIndex].currentWritePtr == nullptr)
{
cemu_assert_suspicious(); // writing to command buffer without valid write pointer?
return;
uint32be* writePtr = (uint32be*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]);
}
uint32be* writePtr = GX2::s_perCoreCBState[coreIndex].currentWritePtr;
gx2WriteGather_submit_(coreIndex, writePtr, std::forward<Targs>(args)...);
}
namespace GX2
{
uint32 GX2WriteGather_getReadWriteDistance();
void GX2WriteGather_checkAndInsertWrapAroundMark();
void GX2Command_Flush(uint32 numU32sForNextBuffer, bool triggerMarkerInterrupt = true);
void GX2ReserveCmdSpace(uint32 reservedFreeSpaceInU32);
uint64 GX2GetLastSubmittedTimeStamp();
uint64 GX2GetRetiredTimeStamp();
bool GX2WaitTimeStamp(uint64 tsWait);
void GX2BeginDisplayList(MEMPTR<void> displayListAddr, uint32 size);
void GX2BeginDisplayListEx(MEMPTR<void> displayListAddr, uint32 size, bool profiling);
@ -96,7 +101,8 @@ namespace GX2
bool GX2GetDisplayListWriteStatus();
void GX2Init_writeGather();
void GX2CommandInit();
void GX2Init_commandBufferPool(void* bufferBase, uint32 bufferSize);
void GX2Shutdown_commandBufferPool();
void GX2CommandResetToDefaultState();
}