Cemu/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
2025-04-26 17:59:32 +02:00

2199 lines
81 KiB
C++

#include "IML.h"
#include "../PPCRecompiler.h"
#include "../PPCRecompilerIml.h"
#include "IMLRegisterAllocator.h"
#include "IMLRegisterAllocatorRanges.h"
#include "../BackendX64/BackendX64.h"
#include <boost/container/static_vector.hpp>
#include <boost/container/small_vector.hpp>
#include "Common/cpu_features.h"
#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
#define DEBUG_RA_INSTRUCTION_GEN 0
struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
{
IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
: regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
void TrackInstruction(sint32 index)
{
usageStart = std::min<sint32>(usageStart, index);
usageEnd = std::max<sint32>(usageEnd, index + 1); // exclusive index
}
sint32 usageStart;
sint32 usageEnd;
bool isProcessed{false};
IMLRegFormat regBaseFormat;
};
struct IMLRegisterAllocatorContext
{
IMLRegisterAllocatorParameters* raParam;
ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
std::unordered_map<IMLRegID, IMLRegFormat> regIdToBaseFormat;
// first pass
std::vector<std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>> perSegmentAbstractRanges;
// helper methods
inline std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
{
return perSegmentAbstractRanges[imlSegment->momentaryIndex];
}
inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const
{
auto it = regIdToBaseFormat.find(regId);
cemu_assert_debug(it != regIdToBaseFormat.cend());
return it->second;
}
};
struct IMLFixedRegisters
{
struct Entry
{
Entry(IMLReg reg, IMLPhysRegisterSet physRegSet)
: reg(reg), physRegSet(physRegSet) {}
IMLReg reg;
IMLPhysRegisterSet physRegSet;
};
boost::container::small_vector<Entry, 4> listInput; // fixed register requirements for instruction input edge
boost::container::small_vector<Entry, 4> listOutput; // fixed register requirements for instruction output edge
};
static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters)
{
sint32 numIntParams = 0, numFloatParams = 0;
auto AddParameterMapping = [&](IMLReg reg) {
if (!reg.IsValid())
return;
if (reg.GetBaseFormat() == IMLRegFormat::I64)
{
IMLPhysRegisterSet ps;
ps.SetAvailable(intParamToPhysReg[numIntParams]);
fixedRegs.listInput.emplace_back(reg, ps);
numIntParams++;
}
else if (reg.GetBaseFormat() == IMLRegFormat::F64)
{
IMLPhysRegisterSet ps;
ps.SetAvailable(floatParamToPhysReg[numFloatParams]);
fixedRegs.listInput.emplace_back(reg, ps);
numFloatParams++;
}
else
{
cemu_assert_suspicious();
}
};
AddParameterMapping(instruction->op_call_imm.regParam0);
AddParameterMapping(instruction->op_call_imm.regParam1);
AddParameterMapping(instruction->op_call_imm.regParam2);
// return value
if (instruction->op_call_imm.regReturn.IsValid())
{
IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
IMLPhysRegisterSet ps;
if (isIntegerFormat)
{
ps.SetAvailable(intReturnPhysReg);
volatileRegisters.SetReserved(intReturnPhysReg);
}
else
{
ps.SetAvailable(floatReturnPhysReg);
volatileRegisters.SetReserved(floatReturnPhysReg);
}
fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
}
// block volatile registers from being used on the output edge, this makes the register allocator store them during the call
fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters);
}
#if defined(__aarch64__)
// aarch64
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
{
fixedRegs.listInput.clear();
fixedRegs.listOutput.clear();
// code below for aarch64 has not been tested
// The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
// on x86 this is used for instructions like SHL <reg>, CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
cemu_assert_unimplemented();
#ifdef 0
if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
{
const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
IMLPhysRegisterSet volatileRegs;
for (int i=0; i<19; i++) // x0 to x18 are volatile
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
for (int i = 0; i <= 31; i++) // which float registers are volatile?
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
}
#endif
}
#else
// x86-64
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
{
fixedRegs.listInput.clear();
fixedRegs.listOutput.clear();
if (instruction->type == PPCREC_IML_TYPE_R_R_R)
{
if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
{
if(!g_CPUFeatures.x86.bmi2)
{
IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX);
fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
}
}
}
else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
}
else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
{
const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8};
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2};
IMLPhysRegisterSet volatileRegs;
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
// YMM0-YMM5 are volatile
for (int i = 0; i <= 5; i++)
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i);
// for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs);
}
}
#endif
uint32 IMLRA_GetNextIterationIndex()
{
static uint32 recRACurrentIterationIndex = 0;
recRACurrentIterationIndex++;
return recRACurrentIterationIndex;
}
bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase)
{
if (currentSegment == imlSegmentLoopBase)
return true;
if (currentSegment->raInfo.lastIterationIndex == iterationIndex)
return currentSegment->raInfo.isPartOfProcessedLoop;
if (depth >= 9)
return false;
currentSegment->raInfo.lastIterationIndex = iterationIndex;
currentSegment->raInfo.isPartOfProcessedLoop = false;
if (currentSegment->nextSegmentIsUncertain)
return false;
if (currentSegment->nextSegmentBranchNotTaken)
{
if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex)
{
currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
}
}
if (currentSegment->nextSegmentBranchTaken)
{
if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex)
{
currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
}
}
if (currentSegment->raInfo.isPartOfProcessedLoop)
currentSegment->loopDepth++;
return currentSegment->raInfo.isPartOfProcessedLoop;
}
void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase)
{
uint32 iterationIndex = IMLRA_GetNextIterationIndex();
imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex;
if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase))
{
imlSegmentLoopBase->loopDepth++;
}
}
void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
if (imlSegment->nextSegmentIsUncertain)
return;
// check if this segment has a branch that links to itself (tight loop)
if (imlSegment->nextSegmentBranchTaken == imlSegment)
{
// segment loops over itself
imlSegment->loopDepth++;
return;
}
// check if this segment has a branch that goes backwards (potential complex loop)
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex)
{
IMLRA_DetectLoop(ppcImlGenContext, imlSegment);
}
}
#define SUBRANGE_LIST_SIZE (128)
sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition)
{
for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
{
if (subrange->list_accessLocations[i].pos >= startPosition)
{
auto& it = subrange->list_accessLocations[i];
cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
return it.pos.GetRaw() - startPosition.GetRaw();
}
}
cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
return 10001 * 2;
}
// returns -1 if there is no fixed register requirement on or after startPosition
sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess)
{
hasFixedAccess = false;
cemu_assert_debug(startPosition.IsInstructionIndex());
for (auto& fixedReqEntry : range->list_fixedRegRequirements)
{
if (fixedReqEntry.pos < startPosition)
continue;
if (fixedReqEntry.allowedReg.IsAvailable(physRegister))
{
hasFixedAccess = true;
return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw();
}
}
cemu_assert_debug(range->interval.end.IsInstructionIndex());
return range->interval.end.GetRaw() - startPosition.GetRaw();
}
sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister)
{
cemu_assert_debug(startPosition.IsInstructionIndex());
raInstructionEdge lastPos2;
lastPos2.Set(imlSegment->imlList.size(), false);
raInstructionEdge endPos;
endPos = startPosition + maxDistance;
if (endPos > lastPos2)
endPos = lastPos2;
IMLFixedRegisters fixedRegs;
if (startPosition.IsOnOutputEdge())
GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs);
for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos)
{
if (currentPos.IsOnInputEdge())
{
GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
}
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
for (auto& fixedRegLoc : fixedRegAccess)
{
if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
{
cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers
if (fixedRegLoc.physRegSet.IsAvailable(physRegister))
return currentPos.GetRaw() - startPosition.GetRaw();
}
}
}
return endPos.GetRaw() - startPosition.GetRaw();
}
// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex)
sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister)
{
cemu_assert_debug(startPosition.IsInstructionIndex());
sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw();
// next
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
if (subrangeItr->GetPhysicalRegister() != physRegister)
{
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
if (subrangeItr->interval.ContainsEdge(startPosition))
return 0;
if (subrangeItr->interval.end < startPosition)
{
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
cemu_assert_debug(startPosition <= subrangeItr->interval.start);
sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw();
minDistance = std::min(minDistance, currentDist);
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
return minDistance;
}
struct IMLRALivenessTimeline
{
IMLRALivenessTimeline()
{
}
// manually add an active range
void AddActiveRange(raLivenessRange* subrange)
{
activeRanges.emplace_back(subrange);
}
void ExpireRanges(raInstructionEdge expireUpTo)
{
expiredRanges.clear();
size_t count = activeRanges.size();
for (size_t f = 0; f < count; f++)
{
raLivenessRange* liverange = activeRanges[f];
if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use <
{
#ifdef CEMU_DEBUG_ASSERT
if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
assert_dbg(); // infinite subranges should not expire
#endif
expiredRanges.emplace_back(liverange);
// remove entry
activeRanges[f] = activeRanges[count - 1];
f--;
count--;
}
}
if (count != activeRanges.size())
activeRanges.resize(count);
}
std::span<raLivenessRange*> GetExpiredRanges()
{
return {expiredRanges.data(), expiredRanges.size()};
}
std::span<raLivenessRange*> GetActiveRanges()
{
return {activeRanges.data(), activeRanges.size()};
}
raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId)
{
for (auto& it : activeRanges)
if (it->virtualRegister == regId)
return it;
return nullptr;
}
raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg)
{
cemu_assert_debug(physReg >= 0);
for (auto& it : activeRanges)
if (it->physicalRegister == physReg)
return it;
return nullptr;
}
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
private:
boost::container::small_vector<raLivenessRange*, 16> expiredRanges;
};
// mark occupied registers by any overlapping range as unavailable in physRegSet
void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
{
auto clusterRanges = range2->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges)
{
IMLSegment* imlSegment = subrange->imlSegment;
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
if (subrange == subrangeItr)
{
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
if (subrange->interval.IsOverlapping(subrangeItr->interval))
{
if (subrangeItr->GetPhysicalRegister() >= 0)
physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
}
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
}
}
bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs)
{
return lhs->interval.start < rhs->interval.start;
}
void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
{
raLivenessRange* subrangeList[4096 + 1];
sint32 count = 0;
// disassemble linked list
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
cemu_assert(count < 4096);
subrangeList[count] = subrangeItr;
count++;
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
if (count == 0)
{
imlSegment->raInfo.linkedList_allSubranges = nullptr;
return;
}
// sort
std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
// reassemble linked list
subrangeList[count] = nullptr;
imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
subrangeList[0]->link_allSegmentRanges.prev = nullptr;
subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
for (sint32 i = 1; i < count; i++)
{
subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
}
// validate list
#if DEBUG_RA_EXTRA_VALIDATION
sint32 count2 = 0;
subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge currentStartPosition;
currentStartPosition.SetRaw(RA_INTER_RANGE_START);
while (subrangeItr)
{
count2++;
if (subrangeItr->interval2.start < currentStartPosition)
assert_dbg();
currentStartPosition = subrangeItr->interval2.start;
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
if (count != count2)
assert_dbg();
#endif
}
std::unordered_map<IMLRegID, raLivenessRange*>& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
{
return imlSegment->raInfo.linkedList_perVirtualRegister;
}
raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
{
auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
return nullptr;
return it->second;
}
struct raFixedRegRequirementWithVGPR
{
raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId)
: pos(pos), allowedReg(allowedReg), regId(regId) {}
raInstructionEdge pos;
IMLPhysRegisterSet allowedReg;
IMLRegID regId;
};
std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
{
std::vector<raFixedRegRequirementWithVGPR> frrList;
size_t index = 0;
while (index < imlSegment->imlList.size())
{
IMLFixedRegisters fixedRegs;
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
raInstructionEdge pos;
pos.Set(index, true);
for (auto& fixedRegAccess : fixedRegs.listInput)
{
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
}
pos = pos + 1;
for (auto& fixedRegAccess : fixedRegs.listOutput)
{
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
}
index++;
}
return frrList;
}
boost::container::small_vector<raLivenessRange*, 8> IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg)
{
boost::container::small_vector<raLivenessRange*, 8> rangeList;
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
if (!currentRange->interval.ContainsEdge(pos))
continue;
IMLPhysRegisterSet allowedRegs;
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
continue;
if (allowedRegs.IsAvailable(physReg))
rangeList.emplace_back(currentRange);
}
return rangeList;
}
void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
// first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
// todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
{
IMLPhysRegisterSet allowedRegs;
if(currentRange->list_fixedRegRequirements.empty())
{
currentRange = currentRange->link_allSegmentRanges.next;
continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements
}
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
{
currentRange = currentRange->link_allSegmentRanges.next;
continue;
}
if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
{
raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next;
IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange);
currentRange = nextRange;
continue;
}
currentRange = currentRange->link_allSegmentRanges.next;
}
// second pass - look for ranges with conflicting fixed register requirements and split these too (locally)
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if (currentRange->list_fixedRegRequirements.empty())
continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
continue;
if (allowedRegs.HasAnyAvailable())
continue;
cemu_assert_unimplemented();
}
// third pass - assign fixed registers, split ranges if needed
std::vector<raFixedRegRequirementWithVGPR> frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
std::unordered_map<IMLPhysReg, IMLRegID> lastVGPR;
for (size_t i = 0; i < frr.size(); i++)
{
raFixedRegRequirementWithVGPR& entry = frr[i];
// we currently only handle fixed register requirements with a single register
// with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers
cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable());
for (IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg + 1))
{
// check if the assigned vGPR has changed
bool vgprHasChanged = false;
auto it = lastVGPR.find(physReg);
if (it != lastVGPR.end())
vgprHasChanged = it->second != entry.regId;
else
vgprHasChanged = true;
lastVGPR[physReg] = entry.regId;
if (!vgprHasChanged)
continue;
boost::container::small_vector<raLivenessRange*, 8> overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg);
if (entry.regId != IMLRegID_INVALID)
cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers
for (auto& range : overlappingRanges)
{
if (range->interval.start < entry.pos)
{
IMLRA_SplitRange(ppcImlGenContext, range, entry.pos, true);
}
}
}
}
// finally iterate ranges and assign fixed registers
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if (currentRange->list_fixedRegRequirements.empty())
continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
{
cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
continue;
}
cemu_assert_debug(allowedRegs.HasExactlyOneAvailable());
currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg());
}
// DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned
#if DEBUG_RA_EXTRA_VALIDATION
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if (!currentRange->HasPhysicalRegister())
continue;
for (raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next)
{
if (currentRange == currentRange2)
continue;
if (currentRange->interval2.IsOverlapping(currentRange2->interval2))
{
cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister());
}
}
}
#endif
}
// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output)
// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified
void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos)
{
// we ignore the instruction for now and just always make it a safe split position
cemu_assert_debug(pos.IsInstructionIndex());
if (pos.IsOnOutputEdge())
pos = pos - 1;
}
// convenience wrapper for IMLRA_MakeSafeSplitPosition
void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance)
{
cemu_assert_debug(startPos.IsInstructionIndex());
cemu_assert_debug(distance >= 0);
raInstructionEdge endPos = startPos + distance;
IMLRA_MakeSafeSplitPosition(imlSegment, endPos);
if (endPos < startPos)
{
distance = 0;
return;
}
distance = endPos.GetRaw() - startPos.GetRaw();
}
static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx);
class RASpillStrategy
{
public:
virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0;
sint32 GetCost()
{
return strategyCost;
}
protected:
void ResetCost()
{
strategyCost = INT_MAX;
}
sint32 strategyCost;
};
class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy
{
public:
void Reset()
{
localRangeHoleCutting.distance = -1;
localRangeHoleCutting.largestHoleSubrange = nullptr;
ResetCost();
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
{
raInstructionEdge currentRangeStart = currentRange->interval.start;
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
cemu_assert_debug(localRangeHoleCutting.distance == -1);
cemu_assert_debug(strategyCost == INT_MAX);
if (!currentRangeStart.ConnectsToPreviousSegment())
{
cemu_assert_debug(currentRangeStart.GetRaw() >= 0);
for (auto candidate : timeline.activeRanges)
{
if (candidate->interval.ExtendsIntoNextSegment())
continue;
// new checks (Oct 2024):
if (candidate == currentRange)
continue;
if (candidate->GetPhysicalRegister() < 0)
continue;
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
continue;
sint32 distance2 = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2);
if (distance2 < 2)
continue;
cemu_assert_debug(currentRangeStart.IsInstructionIndex());
distance2 = std::min<sint32>(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment
// calculate split cost of candidate
sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
// calculate additional split cost of currentRange if hole is not large enough
if (distance2 < requiredSize2)
{
cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
// we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes)
cost += (requiredSize2 - distance2) / 10;
}
// compare cost with previous candidates
if (cost < strategyCost)
{
strategyCost = cost;
localRangeHoleCutting.distance = distance2;
localRangeHoleCutting.largestHoleSubrange = candidate;
}
}
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
cemu_assert_debug(strategyCost != INT_MAX);
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
raInstructionEdge currentRangeStart = currentRange->interval.start;
raInstructionEdge holeStartPosition = currentRangeStart;
raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance;
raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange;
if (collisionRange->interval.start < holeStartPosition)
{
collisionRange = IMLRA_SplitRange(nullptr, collisionRange, holeStartPosition, true);
cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point
cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough
}
else
{
cemu_assert_unimplemented(); // we still need to trim?
}
// we may also have to cut the current range to fit partially into the hole
if (requiredSize2 > localRangeHoleCutting.distance)
{
raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true);
if (tailRange)
{
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
tailRange->UnsetPhysicalRegister();
}
}
// verify that the hole is large enough
if (collisionRange)
{
cemu_assert_debug(!collisionRange->interval.IsOverlapping(currentRange->interval));
}
}
private:
struct
{
sint32 distance;
raLivenessRange* largestHoleSubrange;
} localRangeHoleCutting;
};
class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy
{
// split current range (this is generally only a good choice when the current range is long but has few usages)
public:
void Reset()
{
ResetCost();
availableRegisterHole.distance = -1;
availableRegisterHole.physRegister = -1;
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs)
{
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
raInstructionEdge currentRangeStart = currentRange->interval.start;
cemu_assert_debug(strategyCost == INT_MAX);
availableRegisterHole.distance = -1;
availableRegisterHole.physRegister = -1;
if (currentRangeStart.GetRaw() >= 0)
{
if (localAvailableRegsMask.HasAnyAvailable())
{
sint32 physRegItr = -1;
while (true)
{
physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1);
if (physRegItr < 0)
break;
if (!allowedRegs.IsAvailable(physRegItr))
continue;
// get size of potential hole for this register
sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr);
// some instructions may require the same register for another range, check the distance here
sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr);
if (distUntilFixedReg < distance)
distance = distUntilFixedReg;
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
if (distance < 2)
continue;
// calculate additional cost due to split
cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register?
sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
// add small additional cost for the remaining range (prefer larger holes)
cost += ((requiredSize2 - distance) / 2) / 10;
if (cost < strategyCost)
{
strategyCost = cost;
availableRegisterHole.distance = distance;
availableRegisterHole.physRegister = physRegItr;
}
}
}
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
cemu_assert_debug(strategyCost != INT_MAX);
raInstructionEdge currentRangeStart = currentRange->interval.start;
// use available register
raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true);
if (tailRange)
{
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
tailRange->UnsetPhysicalRegister();
}
}
private:
struct
{
sint32 physRegister;
sint32 distance; // size of hole
} availableRegisterHole;
};
class RASpillStrategy_ExplodeRange : public RASpillStrategy
{
public:
void Reset()
{
ResetCost();
explodeRange.range = nullptr;
explodeRange.distance = -1;
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
{
raInstructionEdge currentRangeStart = currentRange->interval.start;
if (currentRangeStart.ConnectsToPreviousSegment())
currentRangeStart.Set(0, true);
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
cemu_assert_debug(strategyCost == INT_MAX);
explodeRange.range = nullptr;
explodeRange.distance = -1;
for (auto candidate : timeline.activeRanges)
{
if (!candidate->interval.ExtendsIntoNextSegment())
continue;
// new checks (Oct 2024):
if (candidate == currentRange)
continue;
if (candidate->GetPhysicalRegister() < 0)
continue;
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
continue;
sint32 distance = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
if (distance < 2)
continue;
sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
// if the hole is not large enough, add cost of splitting current subrange
if (distance < requiredSize2)
{
cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
// add small additional cost for the remaining range (prefer larger holes)
cost += ((requiredSize2 - distance) / 2) / 10;
}
// compare with current best candidate for this strategy
if (cost < strategyCost)
{
strategyCost = cost;
explodeRange.distance = distance;
explodeRange.range = candidate;
}
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
raInstructionEdge currentRangeStart = currentRange->interval.start;
if (currentRangeStart.ConnectsToPreviousSegment())
currentRangeStart.Set(0, true);
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
// explode range
IMLRA_ExplodeRangeCluster(nullptr, explodeRange.range);
// split current subrange if necessary
if (requiredSize2 > explodeRange.distance)
{
raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + explodeRange.distance, true);
if (tailRange)
{
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
tailRange->UnsetPhysicalRegister();
}
}
}
private:
struct
{
raLivenessRange* range;
sint32 distance; // size of hole
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
} explodeRange;
};
class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
{
public:
void Reset()
{
ResetCost();
explodeRange.range = nullptr;
explodeRange.distance = -1;
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
{
// explode the range with the least cost
cemu_assert_debug(strategyCost == INT_MAX);
cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1);
for (auto candidate : timeline.activeRanges)
{
if (!candidate->interval.ExtendsIntoNextSegment())
continue;
// only select candidates that clash with current subrange
if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange)
continue;
// and also filter any that dont meet fixed register requirements
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
continue;
sint32 cost;
cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
// compare with current best candidate for this strategy
if (cost < strategyCost)
{
strategyCost = cost;
explodeRange.distance = INT_MAX;
explodeRange.range = candidate;
}
}
// add current range as a candidate too
sint32 ownCost;
ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange);
if (ownCost < strategyCost)
{
strategyCost = ownCost;
explodeRange.distance = INT_MAX;
explodeRange.range = currentRange;
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
cemu_assert_debug(strategyCost != INT_MAX);
IMLRA_ExplodeRangeCluster(ctx, explodeRange.range);
}
private:
struct
{
raLivenessRange* range;
sint32 distance; // size of hole
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
}explodeRange;
};
// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies
void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
{
IMLSegment* seg = currentRange->imlSegment;
if (seg->imlList.empty())
return; // there can be no fixed register requirements if there are no instructions
raInstructionEdge firstPos = currentRange->interval.start;
if (currentRange->interval.start.ConnectsToPreviousSegment())
firstPos.SetRaw(0);
else if (currentRange->interval.start.ConnectsToNextSegment())
firstPos.Set(seg->imlList.size() - 1, false);
raInstructionEdge lastPos = currentRange->interval.end;
if (currentRange->interval.end.ConnectsToPreviousSegment())
lastPos.SetRaw(0);
else if (currentRange->interval.end.ConnectsToNextSegment())
lastPos.Set(seg->imlList.size() - 1, false);
cemu_assert_debug(firstPos <= lastPos);
IMLRegID ourRegId = currentRange->GetVirtualRegister();
IMLFixedRegisters fixedRegs;
if (firstPos.IsOnOutputEdge())
GetInstructionFixedRegisters(seg->imlList.data() + firstPos.GetInstructionIndex(), fixedRegs);
for (raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos)
{
if (currentPos.IsOnInputEdge())
{
GetInstructionFixedRegisters(seg->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
}
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
for (auto& fixedRegLoc : fixedRegAccess)
{
if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet);
}
}
}
// filter out any registers along the range cluster
void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
{
cemu_assert_debug(currentRange->imlSegment == imlSegment);
if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
{
auto clusterRanges = currentRange->GetAllSubrangesInCluster();
for (auto& rangeIt : clusterRanges)
{
IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet);
if (!candidatePhysRegSet.HasAnyAvailable())
break;
}
return;
}
IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet);
}
bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
// sort subranges ascending by start index
_sortSegmentAllSubrangesLinkedList(imlSegment);
IMLRALivenessTimeline livenessTimeline;
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge lastInstructionEdge;
lastInstructionEdge.SetRaw(RA_INTER_RANGE_END);
struct
{
RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting;
RASpillStrategy_AvailableRegisterHole availableRegisterHole;
RASpillStrategy_ExplodeRange explodeRange;
// for ranges that connect to follow up segments:
RASpillStrategy_ExplodeRangeInter explodeRangeInter;
} strategy;
while (subrangeItr)
{
raInstructionEdge currentRangeStart = subrangeItr->interval.start; // used to be currentIndex before refactor
PPCRecRA_debugValidateSubrange(subrangeItr);
livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges
// if subrange already has register assigned then add it to the active list and continue
if (subrangeItr->GetPhysicalRegister() >= 0)
{
// verify if register is actually available
#if DEBUG_RA_EXTRA_VALIDATION
for (auto& liverangeItr : livenessTimeline.activeRanges)
{
// check for register mismatch
cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister());
}
#endif
livenessTimeline.AddActiveRange(subrangeItr);
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
// ranges with fixed register requirements should already have a phys register assigned
if (!subrangeItr->list_fixedRegRequirements.empty())
{
cemu_assert_debug(subrangeItr->HasPhysicalRegister());
}
// find free register for current subrangeItr and segment
IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister());
IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat);
cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type
IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet);
cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler)
candidatePhysRegSet &= allowedRegs;
for (auto& liverangeItr : livenessTimeline.activeRanges)
{
cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0);
candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister());
}
// check intersections with other ranges and determine allowed registers
IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
if (candidatePhysRegSet.HasAnyAvailable())
{
// check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments)
PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet);
}
// some target instructions may enforce specific registers (e.g. common on X86 where something like SHL <reg>, CL forces CL as the count register)
// we determine the list of allowed registers here
// this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet)
if (candidatePhysRegSet.HasAnyAvailable())
{
IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet);
}
if (candidatePhysRegSet.HasAnyAvailable())
{
// use free register
subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg());
livenessTimeline.AddActiveRange(subrangeItr);
subrangeItr = subrangeItr->link_allSegmentRanges.next; // next
continue;
}
// there is no free register for the entire range
// evaluate different strategies of splitting ranges to free up another register or shorten the current range
strategy.localRangeHoleCutting.Reset();
strategy.availableRegisterHole.Reset();
strategy.explodeRange.Reset();
// cant assign register
// there might be registers available, we just can't use them due to range conflicts
RASpillStrategy* selectedStrategy = nullptr;
auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) {
if (newStrategy.GetCost() == INT_MAX)
return;
if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost())
selectedStrategy = &newStrategy;
};
if (!subrangeItr->interval.ExtendsIntoNextSegment())
{
// range ends in current segment, use local strategies
// evaluate strategy: Cut hole into local subrange
strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
SelectStrategyIfBetter(strategy.localRangeHoleCutting);
// evaluate strategy: Split current range to fit in available holes
// todo - are checks required to avoid splitting on the suffix instruction?
strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs);
SelectStrategyIfBetter(strategy.availableRegisterHole);
// evaluate strategy: Explode inter-segment ranges
strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
SelectStrategyIfBetter(strategy.explodeRange);
}
else // if subrangeItr->interval2.ExtendsIntoNextSegment()
{
strategy.explodeRangeInter.Reset();
strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
SelectStrategyIfBetter(strategy.explodeRangeInter);
}
// choose strategy
if (selectedStrategy)
{
selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr);
}
else
{
// none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy
cemu_assert_debug(subrangeItr->interval.ExtendsPreviousSegment());
// alternative strategy if we have no other choice: explode current range
IMLRA_ExplodeRangeCluster(ppcImlGenContext, subrangeItr);
}
return false;
}
return true;
}
void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext)
{
// start with frequently executed segments first
sint32 maxLoopDepth = 0;
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth);
}
// assign fixed registers first
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt);
#if DEBUG_RA_EXTRA_VALIDATION
// fixed registers are currently handled per-segment, but here we validate that they are assigned correctly on a global scope as well
for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2)
{
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
{
cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
continue;
}
cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister()));
}
}
#endif
while (true)
{
bool done = false;
for (sint32 d = maxLoopDepth; d >= 0; d--)
{
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
if (segIt->loopDepth != d)
continue;
done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt);
if (done == false)
break;
}
if (done == false)
break;
}
if (done)
break;
}
}
void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
{
// insert empty segments after every non-taken branch if the linked segment has more than one input
// this gives the register allocator more room to create efficient spill code
size_t segmentIndex = 0;
while (segmentIndex < ppcImlGenContext->segmentList2.size())
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
if (imlSegment->nextSegmentIsUncertain)
{
segmentIndex++;
continue;
}
if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr)
{
segmentIndex++;
continue;
}
if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1)
{
segmentIndex++;
continue;
}
if (imlSegment->nextSegmentBranchNotTaken->isEnterable)
{
segmentIndex++;
continue;
}
PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1);
IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0];
IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1];
IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken;
IMLSegment_RemoveLink(imlSegmentP0, nextSegment);
IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment);
IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
segmentIndex++;
}
// detect loops
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
imlSegment->momentaryIndex = s;
}
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
IMLRA_IdentifyLoop(ppcImlGenContext, imlSegment);
}
}
IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
{
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
auto it = segMap.find(regId);
return it != segMap.end() ? &it->second : nullptr;
}
// scan instructions and establish register usage range for segment
void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
size_t instructionIndex = 0;
IMLUsedRegisters gprTracking;
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
while (instructionIndex < imlSegment->imlList.size())
{
imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
IMLRegID gprId = gprReg.GetRegID();
auto it = segDistMap.find(gprId);
if (it == segDistMap.end())
{
segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1);
ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat());
}
else
{
it->second.TrackInstruction(instructionIndex);
#ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same
#endif
}
});
instructionIndex++;
}
}
void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
{
// for each register calculate min/max index of usage range within each segment
size_t dbgIndex = 0;
for (IMLSegment* segIt : ctx.deprGenContext->segmentList2)
{
cemu_assert_debug(segIt->momentaryIndex == dbgIndex);
IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt);
dbgIndex++;
}
}
raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name)
{
IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR);
if (!abstractRange)
return nullptr;
if (abstractRange->isProcessed)
{
// return already existing segment
raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR);
cemu_assert_debug(existingRange);
return existingRange;
}
abstractRange->isProcessed = true;
// create subrange
cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
cemu_assert_debug(
(abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) ||
abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger
sint32 inclusiveEnd = abstractRange->usageEnd;
if (inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END)
inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive
raInterval interval;
interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true);
raLivenessRange* subrange = IMLRA_CreateRange(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end);
// traverse forward
if (abstractRange->usageEnd == RA_INTER_RANGE_END)
{
if (imlSegment->nextSegmentBranchTaken)
{
IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR);
if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START)
{
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name);
subrange->subrangeBranchTaken->previousRanges.push_back(subrange);
cemu_assert_debug(subrange->subrangeBranchTaken->interval.ExtendsPreviousSegment());
}
}
if (imlSegment->nextSegmentBranchNotTaken)
{
IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR);
if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START)
{
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name);
subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange);
cemu_assert_debug(subrange->subrangeBranchNotTaken->interval.ExtendsPreviousSegment());
}
}
}
// traverse backward
if (abstractRange->usageStart == RA_INTER_RANGE_START)
{
for (auto& it : imlSegment->list_prevSegments)
{
IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR);
if (!prevRange)
continue;
if (prevRange->usageEnd == RA_INTER_RANGE_END)
PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
}
}
return subrange;
}
void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos)
{
if (subrange->list_accessLocations.empty())
{
subrange->list_accessLocations.emplace_back(pos);
return;
}
if(subrange->list_accessLocations.back().pos == pos)
return;
cemu_assert_debug(subrange->list_accessLocations.back().pos < pos);
subrange->list_accessLocations.emplace_back(pos);
}
// take abstract range data and create LivenessRanges
void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
const std::unordered_map<IMLRegID, raLivenessRange*>& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) {
raLivenessRange* subrange = regToSubrange.find(regId)->second;
cemu_assert_debug(subrange);
raFixedRegRequirement tmp;
tmp.pos.Set(instructionIndex, isInput);
tmp.allowedReg = physRegSet;
if (subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos)
subrange->list_fixedRegRequirements.push_back(tmp);
};
// convert abstract min-max ranges to liveness range objects
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
for (auto& it : segMap)
{
if (it.second.isProcessed)
continue;
IMLRegID regId = it.first;
PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second);
}
// fill created ranges with read/write location indices
// note that at this point there is only one range per register per segment
// and the algorithm below relies on this
size_t index = 0;
IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size())
{
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
raInstructionEdge pos((sint32)index, true);
gprTracking.ForEachReadGPR([&](IMLReg gprReg) {
IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
});
pos = {(sint32)index, false};
gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) {
IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
});
// check fixed register requirements
IMLFixedRegisters fixedRegs;
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
for (auto& fixedRegAccess : fixedRegs.listInput)
{
if (fixedRegAccess.reg != IMLREG_INVALID)
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet);
}
for (auto& fixedRegAccess : fixedRegs.listOutput)
{
if (fixedRegAccess.reg != IMLREG_INVALID)
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet);
}
index++;
}
}
void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
{
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
auto it = segDistMap.find(regId);
if (it == segDistMap.end())
{
sint32 startIndex;
if (imlSegment->HasSuffixInstruction())
startIndex = imlSegment->GetSuffixInstructionIndex();
else
startIndex = RA_INTER_RANGE_END;
segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END);
}
else
{
it->second.usageEnd = RA_INTER_RANGE_END;
}
}
void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
{
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
auto it = segDistMap.find(regId);
if (it == segDistMap.end())
{
segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START);
}
else
{
it->second.usageStart = RA_INTER_RANGE_START;
}
// propagate backwards
for (auto& it : imlSegment->list_prevSegments)
{
IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId);
}
}
void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth)
{
#ifdef CEMU_DEBUG_ASSERT
if (routeDepth < 2)
assert_dbg();
#endif
// extend starting range to end of segment
IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId);
// extend all the connecting segments in both directions
for (sint32 i = 1; i < (routeDepth - 1); i++)
{
IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId);
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId);
}
// extend the final segment towards the beginning
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId);
}
void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth)
{
if (routeDepth >= 64)
{
cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n");
return;
}
route[routeDepth] = currentSegment;
IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID);
if (!range)
{
// measure distance over entire segment
distanceLeft -= (sint32)currentSegment->imlList.size();
if (distanceLeft > 0)
{
if (currentSegment->nextSegmentBranchNotTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1);
if (currentSegment->nextSegmentBranchTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1);
}
return;
}
else
{
// measure distance to range
if (range->usageStart == RA_INTER_RANGE_END)
{
if (distanceLeft < (sint32)currentSegment->imlList.size())
return; // range too far away
}
else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft)
return; // out of range
// found close range -> connect ranges
IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1);
}
}
void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID)
{
cemu_assert_debug(range->usageEnd >= 0);
// count instructions to end of initial segment
sint32 instructionsUntilEndOfSeg;
if (range->usageEnd == RA_INTER_RANGE_END)
instructionsUntilEndOfSeg = 0;
else
instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd;
cemu_assert_debug(instructionsUntilEndOfSeg >= 0);
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
if (remainingScanDist <= 0)
return; // can't reach end
IMLSegment* route[64];
route[0] = currentSegment;
if (currentSegment->nextSegmentBranchNotTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1);
if (currentSegment->nextSegmentBranchTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1);
}
void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
for (auto& it : segMap)
{
PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first);
}
#ifdef CEMU_DEBUG_ASSERT
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
assert_dbg();
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
assert_dbg();
#endif
}
void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
std::vector<IMLSegment*> list_segments;
std::vector<bool> list_processedSegment;
size_t segmentCount = ctx.deprGenContext->segmentList2.size();
list_segments.reserve(segmentCount + 1);
list_processedSegment.resize(segmentCount);
auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {
list_processedSegment[seg->momentaryIndex] = true;
};
auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool {
return list_processedSegment[seg->momentaryIndex];
};
markSegProcessed(imlSegment);
sint32 index = 0;
list_segments.push_back(imlSegment);
while (index < list_segments.size())
{
IMLSegment* currentSegment = list_segments[index];
PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment);
// follow flow
if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken))
{
markSegProcessed(currentSegment->nextSegmentBranchNotTaken);
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
}
if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken))
{
markSegProcessed(currentSegment->nextSegmentBranchTaken);
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
}
index++;
}
}
void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
if (!imlSegment->list_prevSegments.empty())
continue; // not an entry/standalone segment
PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment);
}
}
void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
auto localLoopDepth = imlSegment->loopDepth;
if (localLoopDepth <= 0)
continue; // not inside a loop
// look for loop exit
bool hasLoopExit = false;
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
{
hasLoopExit = true;
}
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
{
hasLoopExit = true;
}
if (hasLoopExit == false)
continue;
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
for (auto& it : segMap)
{
if (it.second.usageEnd != RA_INTER_RANGE_END)
continue;
if (imlSegment->nextSegmentBranchTaken)
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first);
if (imlSegment->nextSegmentBranchNotTaken)
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first);
}
}
}
void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
{
IMLRA_MergeCloseAbstractRanges(ctx);
// extra pass to move register loads and stores out of loops
IMLRA_ExtendAbstractRangesOutOfLoops(ctx);
// calculate liveness ranges
for (auto& segIt : ctx.deprGenContext->segmentList2)
IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt);
}
void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
{
bool isRead = false;
bool isWritten = false;
bool isOverwritten = false;
for (auto& location : subrange->list_accessLocations)
{
if (location.IsRead())
{
isRead = true;
}
if (location.IsWrite())
{
if (isRead == false)
isOverwritten = true;
isWritten = true;
}
}
subrange->_noLoad = isOverwritten;
subrange->hasStore = isWritten;
if (subrange->interval.ExtendsPreviousSegment())
subrange->_noLoad = true;
}
struct subrangeEndingInfo_t
{
raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
sint32 subrangeCount;
bool hasUndefinedEndings;
};
void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
{
if (depth >= 30)
{
info->hasUndefinedEndings = true;
return;
}
if (subrange->lastIterationIndex == iterationIndex)
return; // already processed
subrange->lastIterationIndex = iterationIndex;
if (subrange->hasStoreDelayed)
return; // no need to traverse this subrange
IMLSegment* imlSegment = subrange->imlSegment;
if (!subrange->interval.ExtendsIntoNextSegment())
{
// ending segment
if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
{
info->hasUndefinedEndings = true;
return;
}
else
{
info->subrangeList[info->subrangeCount] = subrange;
info->subrangeCount++;
}
return;
}
// traverse next subranges in flow
if (imlSegment->nextSegmentBranchNotTaken)
{
if (subrange->subrangeBranchNotTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
}
}
if (imlSegment->nextSegmentBranchTaken)
{
if (subrange->subrangeBranchTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
}
}
}
static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
{
if (!subrange->interval.ExtendsIntoNextSegment())
return;
// analyze data flow across segments (if this segment has writes)
if (subrange->hasStore)
{
subrangeEndingInfo_t writeEndingInfo;
writeEndingInfo.subrangeCount = 0;
writeEndingInfo.hasUndefinedEndings = false;
_findSubrangeWriteEndings(subrange, IMLRA_GetNextIterationIndex(), 0, &writeEndingInfo);
if (writeEndingInfo.hasUndefinedEndings == false)
{
// get cost of delaying store into endings
sint32 delayStoreCost = 0;
bool alreadyStoredInAllEndings = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
if (subrangeItr->hasStore)
continue; // this ending already stores, no extra cost
alreadyStoredInAllEndings = false;
sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment);
delayStoreCost = std::max(storeCost, delayStoreCost);
}
if (alreadyStoredInAllEndings)
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
}
else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment))
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
subrangeItr->hasStore = true;
}
}
}
}
}
void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
{
// this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore
// track read/write dependencies per segment
for (auto& seg : ppcImlGenContext->segmentList2)
{
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
while (subrange)
{
IMLRA_AnalyzeSubrangeDataDependency(subrange);
subrange = subrange->link_allSegmentRanges.next;
}
}
// propagate information across segment boundaries
for (auto& seg : ppcImlGenContext->segmentList2)
{
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
while (subrange)
{
IMLRA_AnalyzeRangeDataFlow(subrange);
subrange = subrange->link_allSegmentRanges.next;
}
}
}
/* Generate move instructions */
inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
{
return IMLReg(baseFormat, baseFormat, 0, regId);
}
// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts
void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
std::unordered_map<IMLRegID, IMLRegID> virtId2PhysReg;
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge currentEdge;
for (size_t i = 0; i < imlSegment->imlList.size(); i++)
{
currentEdge.Set(i, false); // set to instruction index on output edge
// activate ranges which begin before or during this instruction
while (currentRange && currentRange->interval.start <= currentEdge)
{
cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict
virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister();
activeRanges.push_back(currentRange);
currentRange = currentRange->link_allSegmentRanges.next;
}
// rewrite registers
imlSegment->imlList[i].RewriteGPR(virtId2PhysReg);
// deactivate ranges which end during this instruction
auto it = activeRanges.begin();
while (it != activeRanges.end())
{
if ((*it)->interval.end <= currentEdge)
{
virtId2PhysReg.erase((*it)->GetVirtualRegister());
it = activeRanges.erase(it);
}
else
++it;
}
}
}
void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
IMLRA_RewriteRegisters(ctx, imlSegment);
#if DEBUG_RA_INSTRUCTION_GEN
cemuLog_log(LogType::Force, "");
cemuLog_log(LogType::Force, "[Seg before RA]");
IMLDebug_DumpSegment(nullptr, imlSegment, true);
#endif
bool hadSuffixInstruction = imlSegment->HasSuffixInstruction();
std::vector<IMLInstruction> rebuiltInstructions;
sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0);
if (imlSegment->imlList.empty())
{
// empty segments need special handling (todo - look into merging this with the core logic below eventually)
// store all ranges
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
while (currentRange)
{
if (currentRange->hasStore)
rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()));
currentRange = currentRange->link_allSegmentRanges.next;
}
// load ranges
currentRange = imlSegment->raInfo.linkedList_allSubranges;
while (currentRange)
{
if (!currentRange->_noLoad)
{
cemu_assert_debug(currentRange->interval.ExtendsIntoNextSegment());
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
}
currentRange = currentRange->link_allSegmentRanges.next;
}
imlSegment->imlList = std::move(rebuiltInstructions);
return;
}
// make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?)
{
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge edge;
if (imlSegment->HasSuffixInstruction())
edge.Set(numInstructionsWithoutSuffix, true);
else
edge.Set(numInstructionsWithoutSuffix - 1, false);
while (currentRange)
{
if (!currentRange->interval.IsNextSegmentOnly() && currentRange->interval.end > edge)
{
currentRange->interval.SetEnd(edge);
}
currentRange = currentRange->link_allSegmentRanges.next;
}
}
#if DEBUG_RA_INSTRUCTION_GEN
cemuLog_log(LogType::Force, "");
cemuLog_log(LogType::Force, "--- Intermediate liveness info ---");
{
raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges;
while (dbgRange)
{
cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString());
dbgRange = dbgRange->link_allSegmentRanges.next;
}
}
#endif
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
// first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
// make all ranges active that start on RA_INTER_RANGE_START
while (currentRange && currentRange->interval.start.ConnectsToPreviousSegment())
{
activeRanges.push_back(currentRange);
currentRange = currentRange->link_allSegmentRanges.next;
}
// store all ranges that end before the first output edge (includes RA_INTER_RANGE_START)
auto it = activeRanges.begin();
raInstructionEdge firstOutputEdge;
firstOutputEdge.Set(0, false);
while (it != activeRanges.end())
{
if ((*it)->interval.end < firstOutputEdge)
{
raLivenessRange* storedRange = *it;
if (storedRange->hasStore)
rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
it = activeRanges.erase(it);
continue;
}
++it;
}
sint32 numInstructions = (sint32)imlSegment->imlList.size();
for (sint32 i = 0; i < numInstructions; i++)
{
raInstructionEdge curEdge;
// input edge
curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction
while (currentRange && currentRange->interval.start <= curEdge)
{
if (!currentRange->_noLoad)
{
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
}
activeRanges.push_back(currentRange);
currentRange = currentRange->link_allSegmentRanges.next;
}
// copy instruction
rebuiltInstructions.push_back(imlSegment->imlList[i]);
// output edge
curEdge.SetRaw(i * 2 + 1 + 1);
// also store ranges that end on the next input edge, we handle this by adding an extra 1 above
auto it = activeRanges.begin();
while (it != activeRanges.end())
{
if ((*it)->interval.end <= curEdge)
{
// range expires
// todo - check hasStore
raLivenessRange* storedRange = *it;
if (storedRange->hasStore)
{
cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix
rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
}
it = activeRanges.erase(it);
continue;
}
++it;
}
}
// if there is no suffix instruction we currently need to handle the final loads here
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
if (imlSegment->HasSuffixInstruction())
{
cemu_assert_debug(!currentRange); // currentRange should be NULL?
for (auto& remainingRange : activeRanges)
{
cemu_assert_debug(!remainingRange->hasStore);
}
}
else
{
for (auto& remainingRange : activeRanges)
{
cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored
}
while (currentRange)
{
cemu_assert_debug(currentRange->interval.IsNextSegmentOnly());
cemu_assert_debug(!currentRange->_noLoad);
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
currentRange = currentRange->link_allSegmentRanges.next;
}
}
imlSegment->imlList = std::move(rebuiltInstructions);
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
#if DEBUG_RA_INSTRUCTION_GEN
cemuLog_log(LogType::Force, "");
cemuLog_log(LogType::Force, "[Seg after RA]");
IMLDebug_DumpSegment(nullptr, imlSegment, false);
#endif
}
void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment);
}
}
static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment)
{
#if DEBUG_RA_EXTRA_VALIDATION
std::vector<raFixedRegRequirementWithVGPR> frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
for(auto& fixedReq : frr)
{
for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next)
{
if (!range->interval2.ContainsEdge(fixedReq.pos))
continue;
// verify if the requirement is compatible
if(range->GetVirtualRegister() == fixedReq.regId)
{
cemu_assert(range->HasPhysicalRegister());
cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register
}
else
{
cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register
}
}
}
#endif
}
static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
{
#if DEBUG_RA_EXTRA_VALIDATION
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
PPCRecRA_debugValidateSubrange(subrangeItr);
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
}
// check that no range validates register requirements
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]);
}
#endif
}
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
{
IMLRegisterAllocatorContext ctx;
ctx.raParam = &raParam;
ctx.deprGenContext = ppcImlGenContext;
IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext);
ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment
ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size());
IMLRA_CalculateLivenessRanges(ctx);
IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx);
IMLRA_AssignRegisters(ctx, ppcImlGenContext);
DbgVerifyAllRanges(ctx);
IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext);
IMLRA_GenerateMoveInstructions(ctx);
IMLRA_DeleteAllRanges(ppcImlGenContext);
}