mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-12-11 16:37:02 +00:00
2199 lines
81 KiB
C++
2199 lines
81 KiB
C++
#include "IML.h"
|
|
|
|
#include "../PPCRecompiler.h"
|
|
#include "../PPCRecompilerIml.h"
|
|
#include "IMLRegisterAllocator.h"
|
|
#include "IMLRegisterAllocatorRanges.h"
|
|
|
|
#include "../BackendX64/BackendX64.h"
|
|
|
|
#include <boost/container/static_vector.hpp>
|
|
#include <boost/container/small_vector.hpp>
|
|
|
|
#include "Common/cpu_features.h"
|
|
|
|
#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
|
|
#define DEBUG_RA_INSTRUCTION_GEN 0
|
|
|
|
struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
|
|
{
|
|
IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
|
|
: regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
|
|
|
|
void TrackInstruction(sint32 index)
|
|
{
|
|
usageStart = std::min<sint32>(usageStart, index);
|
|
usageEnd = std::max<sint32>(usageEnd, index + 1); // exclusive index
|
|
}
|
|
|
|
sint32 usageStart;
|
|
sint32 usageEnd;
|
|
bool isProcessed{false};
|
|
IMLRegFormat regBaseFormat;
|
|
};
|
|
|
|
struct IMLRegisterAllocatorContext
|
|
{
|
|
IMLRegisterAllocatorParameters* raParam;
|
|
ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
|
|
|
|
std::unordered_map<IMLRegID, IMLRegFormat> regIdToBaseFormat;
|
|
// first pass
|
|
std::vector<std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>> perSegmentAbstractRanges;
|
|
|
|
// helper methods
|
|
inline std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
|
|
{
|
|
return perSegmentAbstractRanges[imlSegment->momentaryIndex];
|
|
}
|
|
|
|
inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const
|
|
{
|
|
auto it = regIdToBaseFormat.find(regId);
|
|
cemu_assert_debug(it != regIdToBaseFormat.cend());
|
|
return it->second;
|
|
}
|
|
};
|
|
|
|
struct IMLFixedRegisters
|
|
{
|
|
struct Entry
|
|
{
|
|
Entry(IMLReg reg, IMLPhysRegisterSet physRegSet)
|
|
: reg(reg), physRegSet(physRegSet) {}
|
|
|
|
IMLReg reg;
|
|
IMLPhysRegisterSet physRegSet;
|
|
};
|
|
boost::container::small_vector<Entry, 4> listInput; // fixed register requirements for instruction input edge
|
|
boost::container::small_vector<Entry, 4> listOutput; // fixed register requirements for instruction output edge
|
|
};
|
|
|
|
static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters)
|
|
{
|
|
sint32 numIntParams = 0, numFloatParams = 0;
|
|
|
|
auto AddParameterMapping = [&](IMLReg reg) {
|
|
if (!reg.IsValid())
|
|
return;
|
|
if (reg.GetBaseFormat() == IMLRegFormat::I64)
|
|
{
|
|
IMLPhysRegisterSet ps;
|
|
ps.SetAvailable(intParamToPhysReg[numIntParams]);
|
|
fixedRegs.listInput.emplace_back(reg, ps);
|
|
numIntParams++;
|
|
}
|
|
else if (reg.GetBaseFormat() == IMLRegFormat::F64)
|
|
{
|
|
IMLPhysRegisterSet ps;
|
|
ps.SetAvailable(floatParamToPhysReg[numFloatParams]);
|
|
fixedRegs.listInput.emplace_back(reg, ps);
|
|
numFloatParams++;
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_suspicious();
|
|
}
|
|
};
|
|
AddParameterMapping(instruction->op_call_imm.regParam0);
|
|
AddParameterMapping(instruction->op_call_imm.regParam1);
|
|
AddParameterMapping(instruction->op_call_imm.regParam2);
|
|
// return value
|
|
if (instruction->op_call_imm.regReturn.IsValid())
|
|
{
|
|
IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
|
|
bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
|
|
IMLPhysRegisterSet ps;
|
|
if (isIntegerFormat)
|
|
{
|
|
ps.SetAvailable(intReturnPhysReg);
|
|
volatileRegisters.SetReserved(intReturnPhysReg);
|
|
}
|
|
else
|
|
{
|
|
ps.SetAvailable(floatReturnPhysReg);
|
|
volatileRegisters.SetReserved(floatReturnPhysReg);
|
|
}
|
|
fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
|
|
}
|
|
// block volatile registers from being used on the output edge, this makes the register allocator store them during the call
|
|
fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters);
|
|
}
|
|
|
|
#if defined(__aarch64__)
|
|
// aarch64
|
|
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
|
|
{
|
|
fixedRegs.listInput.clear();
|
|
fixedRegs.listOutput.clear();
|
|
|
|
// code below for aarch64 has not been tested
|
|
// The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
|
|
// on x86 this is used for instructions like SHL <reg>, CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
|
|
cemu_assert_unimplemented();
|
|
#ifdef 0
|
|
if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
|
{
|
|
const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
|
|
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
|
|
IMLPhysRegisterSet volatileRegs;
|
|
for (int i=0; i<19; i++) // x0 to x18 are volatile
|
|
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
|
|
for (int i = 0; i <= 31; i++) // which float registers are volatile?
|
|
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
|
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
|
|
}
|
|
#endif
|
|
}
|
|
#else
|
|
// x86-64
|
|
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
|
|
{
|
|
fixedRegs.listInput.clear();
|
|
fixedRegs.listOutput.clear();
|
|
|
|
if (instruction->type == PPCREC_IML_TYPE_R_R_R)
|
|
{
|
|
if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
|
|
{
|
|
if(!g_CPUFeatures.x86.bmi2)
|
|
{
|
|
IMLPhysRegisterSet ps;
|
|
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX);
|
|
fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
|
|
}
|
|
}
|
|
}
|
|
else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
|
|
{
|
|
IMLPhysRegisterSet ps;
|
|
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
|
|
fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
|
|
fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
|
|
}
|
|
else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
|
{
|
|
const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8};
|
|
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2};
|
|
IMLPhysRegisterSet volatileRegs;
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
|
|
// YMM0-YMM5 are volatile
|
|
for (int i = 0; i <= 5; i++)
|
|
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i);
|
|
// for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
|
|
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
uint32 IMLRA_GetNextIterationIndex()
|
|
{
|
|
static uint32 recRACurrentIterationIndex = 0;
|
|
recRACurrentIterationIndex++;
|
|
return recRACurrentIterationIndex;
|
|
}
|
|
|
|
bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase)
|
|
{
|
|
if (currentSegment == imlSegmentLoopBase)
|
|
return true;
|
|
if (currentSegment->raInfo.lastIterationIndex == iterationIndex)
|
|
return currentSegment->raInfo.isPartOfProcessedLoop;
|
|
if (depth >= 9)
|
|
return false;
|
|
currentSegment->raInfo.lastIterationIndex = iterationIndex;
|
|
currentSegment->raInfo.isPartOfProcessedLoop = false;
|
|
|
|
if (currentSegment->nextSegmentIsUncertain)
|
|
return false;
|
|
if (currentSegment->nextSegmentBranchNotTaken)
|
|
{
|
|
if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex)
|
|
{
|
|
currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
|
|
}
|
|
}
|
|
if (currentSegment->nextSegmentBranchTaken)
|
|
{
|
|
if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex)
|
|
{
|
|
currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
|
|
}
|
|
}
|
|
if (currentSegment->raInfo.isPartOfProcessedLoop)
|
|
currentSegment->loopDepth++;
|
|
return currentSegment->raInfo.isPartOfProcessedLoop;
|
|
}
|
|
|
|
void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase)
|
|
{
|
|
uint32 iterationIndex = IMLRA_GetNextIterationIndex();
|
|
imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex;
|
|
if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase))
|
|
{
|
|
imlSegmentLoopBase->loopDepth++;
|
|
}
|
|
}
|
|
|
|
void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
|
{
|
|
if (imlSegment->nextSegmentIsUncertain)
|
|
return;
|
|
// check if this segment has a branch that links to itself (tight loop)
|
|
if (imlSegment->nextSegmentBranchTaken == imlSegment)
|
|
{
|
|
// segment loops over itself
|
|
imlSegment->loopDepth++;
|
|
return;
|
|
}
|
|
// check if this segment has a branch that goes backwards (potential complex loop)
|
|
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex)
|
|
{
|
|
IMLRA_DetectLoop(ppcImlGenContext, imlSegment);
|
|
}
|
|
}
|
|
|
|
#define SUBRANGE_LIST_SIZE (128)
|
|
|
|
sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition)
|
|
{
|
|
for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
|
|
{
|
|
if (subrange->list_accessLocations[i].pos >= startPosition)
|
|
{
|
|
auto& it = subrange->list_accessLocations[i];
|
|
cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
|
|
cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
|
|
return it.pos.GetRaw() - startPosition.GetRaw();
|
|
}
|
|
}
|
|
cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
|
|
return 10001 * 2;
|
|
}
|
|
|
|
// returns -1 if there is no fixed register requirement on or after startPosition
|
|
sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess)
|
|
{
|
|
hasFixedAccess = false;
|
|
cemu_assert_debug(startPosition.IsInstructionIndex());
|
|
for (auto& fixedReqEntry : range->list_fixedRegRequirements)
|
|
{
|
|
if (fixedReqEntry.pos < startPosition)
|
|
continue;
|
|
if (fixedReqEntry.allowedReg.IsAvailable(physRegister))
|
|
{
|
|
hasFixedAccess = true;
|
|
return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw();
|
|
}
|
|
}
|
|
cemu_assert_debug(range->interval.end.IsInstructionIndex());
|
|
return range->interval.end.GetRaw() - startPosition.GetRaw();
|
|
}
|
|
|
|
sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister)
|
|
{
|
|
cemu_assert_debug(startPosition.IsInstructionIndex());
|
|
raInstructionEdge lastPos2;
|
|
lastPos2.Set(imlSegment->imlList.size(), false);
|
|
|
|
raInstructionEdge endPos;
|
|
endPos = startPosition + maxDistance;
|
|
if (endPos > lastPos2)
|
|
endPos = lastPos2;
|
|
IMLFixedRegisters fixedRegs;
|
|
if (startPosition.IsOnOutputEdge())
|
|
GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs);
|
|
for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos)
|
|
{
|
|
if (currentPos.IsOnInputEdge())
|
|
{
|
|
GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
|
|
}
|
|
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
|
|
for (auto& fixedRegLoc : fixedRegAccess)
|
|
{
|
|
if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
|
|
{
|
|
cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers
|
|
if (fixedRegLoc.physRegSet.IsAvailable(physRegister))
|
|
return currentPos.GetRaw() - startPosition.GetRaw();
|
|
}
|
|
}
|
|
}
|
|
return endPos.GetRaw() - startPosition.GetRaw();
|
|
}
|
|
|
|
// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex)
|
|
sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister)
|
|
{
|
|
cemu_assert_debug(startPosition.IsInstructionIndex());
|
|
sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw();
|
|
// next
|
|
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (subrangeItr)
|
|
{
|
|
if (subrangeItr->GetPhysicalRegister() != physRegister)
|
|
{
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
continue;
|
|
}
|
|
if (subrangeItr->interval.ContainsEdge(startPosition))
|
|
return 0;
|
|
if (subrangeItr->interval.end < startPosition)
|
|
{
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
continue;
|
|
}
|
|
cemu_assert_debug(startPosition <= subrangeItr->interval.start);
|
|
sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw();
|
|
minDistance = std::min(minDistance, currentDist);
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
}
|
|
return minDistance;
|
|
}
|
|
|
|
struct IMLRALivenessTimeline
|
|
{
|
|
IMLRALivenessTimeline()
|
|
{
|
|
}
|
|
|
|
// manually add an active range
|
|
void AddActiveRange(raLivenessRange* subrange)
|
|
{
|
|
activeRanges.emplace_back(subrange);
|
|
}
|
|
|
|
void ExpireRanges(raInstructionEdge expireUpTo)
|
|
{
|
|
expiredRanges.clear();
|
|
size_t count = activeRanges.size();
|
|
for (size_t f = 0; f < count; f++)
|
|
{
|
|
raLivenessRange* liverange = activeRanges[f];
|
|
if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use <
|
|
{
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
|
|
assert_dbg(); // infinite subranges should not expire
|
|
#endif
|
|
expiredRanges.emplace_back(liverange);
|
|
// remove entry
|
|
activeRanges[f] = activeRanges[count - 1];
|
|
f--;
|
|
count--;
|
|
}
|
|
}
|
|
if (count != activeRanges.size())
|
|
activeRanges.resize(count);
|
|
}
|
|
|
|
std::span<raLivenessRange*> GetExpiredRanges()
|
|
{
|
|
return {expiredRanges.data(), expiredRanges.size()};
|
|
}
|
|
|
|
std::span<raLivenessRange*> GetActiveRanges()
|
|
{
|
|
return {activeRanges.data(), activeRanges.size()};
|
|
}
|
|
|
|
raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId)
|
|
{
|
|
for (auto& it : activeRanges)
|
|
if (it->virtualRegister == regId)
|
|
return it;
|
|
return nullptr;
|
|
}
|
|
|
|
raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg)
|
|
{
|
|
cemu_assert_debug(physReg >= 0);
|
|
for (auto& it : activeRanges)
|
|
if (it->physicalRegister == physReg)
|
|
return it;
|
|
return nullptr;
|
|
}
|
|
|
|
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
|
|
|
|
private:
|
|
boost::container::small_vector<raLivenessRange*, 16> expiredRanges;
|
|
};
|
|
|
|
// mark occupied registers by any overlapping range as unavailable in physRegSet
|
|
void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
|
|
{
|
|
auto clusterRanges = range2->GetAllSubrangesInCluster();
|
|
for (auto& subrange : clusterRanges)
|
|
{
|
|
IMLSegment* imlSegment = subrange->imlSegment;
|
|
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (subrangeItr)
|
|
{
|
|
if (subrange == subrangeItr)
|
|
{
|
|
// next
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
continue;
|
|
}
|
|
if (subrange->interval.IsOverlapping(subrangeItr->interval))
|
|
{
|
|
if (subrangeItr->GetPhysicalRegister() >= 0)
|
|
physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
|
|
}
|
|
// next
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs)
|
|
{
|
|
return lhs->interval.start < rhs->interval.start;
|
|
}
|
|
|
|
void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
|
|
{
|
|
raLivenessRange* subrangeList[4096 + 1];
|
|
sint32 count = 0;
|
|
// disassemble linked list
|
|
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (subrangeItr)
|
|
{
|
|
cemu_assert(count < 4096);
|
|
subrangeList[count] = subrangeItr;
|
|
count++;
|
|
// next
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
}
|
|
if (count == 0)
|
|
{
|
|
imlSegment->raInfo.linkedList_allSubranges = nullptr;
|
|
return;
|
|
}
|
|
// sort
|
|
std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
|
|
// reassemble linked list
|
|
subrangeList[count] = nullptr;
|
|
imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
|
|
subrangeList[0]->link_allSegmentRanges.prev = nullptr;
|
|
subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
|
|
for (sint32 i = 1; i < count; i++)
|
|
{
|
|
subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
|
|
subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
|
|
}
|
|
// validate list
|
|
#if DEBUG_RA_EXTRA_VALIDATION
|
|
sint32 count2 = 0;
|
|
subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
|
raInstructionEdge currentStartPosition;
|
|
currentStartPosition.SetRaw(RA_INTER_RANGE_START);
|
|
while (subrangeItr)
|
|
{
|
|
count2++;
|
|
if (subrangeItr->interval2.start < currentStartPosition)
|
|
assert_dbg();
|
|
currentStartPosition = subrangeItr->interval2.start;
|
|
// next
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
}
|
|
if (count != count2)
|
|
assert_dbg();
|
|
#endif
|
|
}
|
|
|
|
std::unordered_map<IMLRegID, raLivenessRange*>& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
|
|
{
|
|
return imlSegment->raInfo.linkedList_perVirtualRegister;
|
|
}
|
|
|
|
raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
|
|
{
|
|
auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
|
|
if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
|
|
return nullptr;
|
|
return it->second;
|
|
}
|
|
|
|
struct raFixedRegRequirementWithVGPR
|
|
{
|
|
raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId)
|
|
: pos(pos), allowedReg(allowedReg), regId(regId) {}
|
|
|
|
raInstructionEdge pos;
|
|
IMLPhysRegisterSet allowedReg;
|
|
IMLRegID regId;
|
|
};
|
|
|
|
std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
|
|
{
|
|
std::vector<raFixedRegRequirementWithVGPR> frrList;
|
|
size_t index = 0;
|
|
while (index < imlSegment->imlList.size())
|
|
{
|
|
IMLFixedRegisters fixedRegs;
|
|
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
|
|
raInstructionEdge pos;
|
|
pos.Set(index, true);
|
|
for (auto& fixedRegAccess : fixedRegs.listInput)
|
|
{
|
|
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
|
|
}
|
|
pos = pos + 1;
|
|
for (auto& fixedRegAccess : fixedRegs.listOutput)
|
|
{
|
|
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
|
|
}
|
|
index++;
|
|
}
|
|
return frrList;
|
|
}
|
|
|
|
boost::container::small_vector<raLivenessRange*, 8> IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg)
|
|
{
|
|
boost::container::small_vector<raLivenessRange*, 8> rangeList;
|
|
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
|
|
{
|
|
if (!currentRange->interval.ContainsEdge(pos))
|
|
continue;
|
|
IMLPhysRegisterSet allowedRegs;
|
|
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
|
|
continue;
|
|
if (allowedRegs.IsAvailable(physReg))
|
|
rangeList.emplace_back(currentRange);
|
|
}
|
|
return rangeList;
|
|
}
|
|
|
|
void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
|
{
|
|
// first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
|
|
// todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases
|
|
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
|
|
{
|
|
IMLPhysRegisterSet allowedRegs;
|
|
if(currentRange->list_fixedRegRequirements.empty())
|
|
{
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements
|
|
}
|
|
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
|
|
{
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
continue;
|
|
}
|
|
if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
|
|
{
|
|
raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next;
|
|
IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange);
|
|
currentRange = nextRange;
|
|
continue;
|
|
}
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
// second pass - look for ranges with conflicting fixed register requirements and split these too (locally)
|
|
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
|
|
{
|
|
IMLPhysRegisterSet allowedRegs;
|
|
if (currentRange->list_fixedRegRequirements.empty())
|
|
continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
|
|
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
|
|
continue;
|
|
if (allowedRegs.HasAnyAvailable())
|
|
continue;
|
|
cemu_assert_unimplemented();
|
|
}
|
|
// third pass - assign fixed registers, split ranges if needed
|
|
std::vector<raFixedRegRequirementWithVGPR> frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
|
|
std::unordered_map<IMLPhysReg, IMLRegID> lastVGPR;
|
|
for (size_t i = 0; i < frr.size(); i++)
|
|
{
|
|
raFixedRegRequirementWithVGPR& entry = frr[i];
|
|
// we currently only handle fixed register requirements with a single register
|
|
// with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers
|
|
cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable());
|
|
for (IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg + 1))
|
|
{
|
|
// check if the assigned vGPR has changed
|
|
bool vgprHasChanged = false;
|
|
auto it = lastVGPR.find(physReg);
|
|
if (it != lastVGPR.end())
|
|
vgprHasChanged = it->second != entry.regId;
|
|
else
|
|
vgprHasChanged = true;
|
|
lastVGPR[physReg] = entry.regId;
|
|
|
|
if (!vgprHasChanged)
|
|
continue;
|
|
|
|
boost::container::small_vector<raLivenessRange*, 8> overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg);
|
|
if (entry.regId != IMLRegID_INVALID)
|
|
cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers
|
|
|
|
for (auto& range : overlappingRanges)
|
|
{
|
|
if (range->interval.start < entry.pos)
|
|
{
|
|
IMLRA_SplitRange(ppcImlGenContext, range, entry.pos, true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// finally iterate ranges and assign fixed registers
|
|
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
|
|
{
|
|
IMLPhysRegisterSet allowedRegs;
|
|
if (currentRange->list_fixedRegRequirements.empty())
|
|
continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
|
|
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
|
|
{
|
|
cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
|
|
continue;
|
|
}
|
|
cemu_assert_debug(allowedRegs.HasExactlyOneAvailable());
|
|
currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg());
|
|
}
|
|
// DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned
|
|
#if DEBUG_RA_EXTRA_VALIDATION
|
|
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
|
|
{
|
|
IMLPhysRegisterSet allowedRegs;
|
|
if (!currentRange->HasPhysicalRegister())
|
|
continue;
|
|
for (raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next)
|
|
{
|
|
if (currentRange == currentRange2)
|
|
continue;
|
|
if (currentRange->interval2.IsOverlapping(currentRange2->interval2))
|
|
{
|
|
cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister());
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output)
|
|
// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified
|
|
void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos)
|
|
{
|
|
// we ignore the instruction for now and just always make it a safe split position
|
|
cemu_assert_debug(pos.IsInstructionIndex());
|
|
if (pos.IsOnOutputEdge())
|
|
pos = pos - 1;
|
|
}
|
|
|
|
// convenience wrapper for IMLRA_MakeSafeSplitPosition
|
|
void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance)
|
|
{
|
|
cemu_assert_debug(startPos.IsInstructionIndex());
|
|
cemu_assert_debug(distance >= 0);
|
|
raInstructionEdge endPos = startPos + distance;
|
|
IMLRA_MakeSafeSplitPosition(imlSegment, endPos);
|
|
if (endPos < startPos)
|
|
{
|
|
distance = 0;
|
|
return;
|
|
}
|
|
distance = endPos.GetRaw() - startPos.GetRaw();
|
|
}
|
|
|
|
static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx);
|
|
|
|
class RASpillStrategy
|
|
{
|
|
public:
|
|
virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0;
|
|
|
|
sint32 GetCost()
|
|
{
|
|
return strategyCost;
|
|
}
|
|
|
|
protected:
|
|
void ResetCost()
|
|
{
|
|
strategyCost = INT_MAX;
|
|
}
|
|
|
|
sint32 strategyCost;
|
|
};
|
|
|
|
class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy
|
|
{
|
|
public:
|
|
void Reset()
|
|
{
|
|
localRangeHoleCutting.distance = -1;
|
|
localRangeHoleCutting.largestHoleSubrange = nullptr;
|
|
ResetCost();
|
|
}
|
|
|
|
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
|
|
{
|
|
raInstructionEdge currentRangeStart = currentRange->interval.start;
|
|
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
|
|
cemu_assert_debug(localRangeHoleCutting.distance == -1);
|
|
cemu_assert_debug(strategyCost == INT_MAX);
|
|
if (!currentRangeStart.ConnectsToPreviousSegment())
|
|
{
|
|
cemu_assert_debug(currentRangeStart.GetRaw() >= 0);
|
|
for (auto candidate : timeline.activeRanges)
|
|
{
|
|
if (candidate->interval.ExtendsIntoNextSegment())
|
|
continue;
|
|
// new checks (Oct 2024):
|
|
if (candidate == currentRange)
|
|
continue;
|
|
if (candidate->GetPhysicalRegister() < 0)
|
|
continue;
|
|
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
|
|
continue;
|
|
|
|
sint32 distance2 = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
|
|
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2);
|
|
if (distance2 < 2)
|
|
continue;
|
|
cemu_assert_debug(currentRangeStart.IsInstructionIndex());
|
|
distance2 = std::min<sint32>(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment
|
|
// calculate split cost of candidate
|
|
sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
|
|
// calculate additional split cost of currentRange if hole is not large enough
|
|
if (distance2 < requiredSize2)
|
|
{
|
|
cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
|
|
// we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes)
|
|
cost += (requiredSize2 - distance2) / 10;
|
|
}
|
|
// compare cost with previous candidates
|
|
if (cost < strategyCost)
|
|
{
|
|
strategyCost = cost;
|
|
localRangeHoleCutting.distance = distance2;
|
|
localRangeHoleCutting.largestHoleSubrange = candidate;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
|
|
{
|
|
cemu_assert_debug(strategyCost != INT_MAX);
|
|
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
|
|
raInstructionEdge currentRangeStart = currentRange->interval.start;
|
|
|
|
raInstructionEdge holeStartPosition = currentRangeStart;
|
|
raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance;
|
|
raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange;
|
|
|
|
if (collisionRange->interval.start < holeStartPosition)
|
|
{
|
|
collisionRange = IMLRA_SplitRange(nullptr, collisionRange, holeStartPosition, true);
|
|
cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point
|
|
cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented(); // we still need to trim?
|
|
}
|
|
// we may also have to cut the current range to fit partially into the hole
|
|
if (requiredSize2 > localRangeHoleCutting.distance)
|
|
{
|
|
raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true);
|
|
if (tailRange)
|
|
{
|
|
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
|
|
tailRange->UnsetPhysicalRegister();
|
|
}
|
|
}
|
|
// verify that the hole is large enough
|
|
if (collisionRange)
|
|
{
|
|
cemu_assert_debug(!collisionRange->interval.IsOverlapping(currentRange->interval));
|
|
}
|
|
}
|
|
|
|
private:
|
|
struct
|
|
{
|
|
sint32 distance;
|
|
raLivenessRange* largestHoleSubrange;
|
|
} localRangeHoleCutting;
|
|
};
|
|
|
|
class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy
|
|
{
|
|
// split current range (this is generally only a good choice when the current range is long but has few usages)
|
|
public:
|
|
void Reset()
|
|
{
|
|
ResetCost();
|
|
availableRegisterHole.distance = -1;
|
|
availableRegisterHole.physRegister = -1;
|
|
}
|
|
|
|
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs)
|
|
{
|
|
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
|
|
|
|
raInstructionEdge currentRangeStart = currentRange->interval.start;
|
|
cemu_assert_debug(strategyCost == INT_MAX);
|
|
availableRegisterHole.distance = -1;
|
|
availableRegisterHole.physRegister = -1;
|
|
if (currentRangeStart.GetRaw() >= 0)
|
|
{
|
|
if (localAvailableRegsMask.HasAnyAvailable())
|
|
{
|
|
sint32 physRegItr = -1;
|
|
while (true)
|
|
{
|
|
physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1);
|
|
if (physRegItr < 0)
|
|
break;
|
|
if (!allowedRegs.IsAvailable(physRegItr))
|
|
continue;
|
|
// get size of potential hole for this register
|
|
sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr);
|
|
|
|
// some instructions may require the same register for another range, check the distance here
|
|
sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr);
|
|
if (distUntilFixedReg < distance)
|
|
distance = distUntilFixedReg;
|
|
|
|
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
|
|
if (distance < 2)
|
|
continue;
|
|
// calculate additional cost due to split
|
|
cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register?
|
|
sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
|
|
// add small additional cost for the remaining range (prefer larger holes)
|
|
cost += ((requiredSize2 - distance) / 2) / 10;
|
|
if (cost < strategyCost)
|
|
{
|
|
strategyCost = cost;
|
|
availableRegisterHole.distance = distance;
|
|
availableRegisterHole.physRegister = physRegItr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
|
|
{
|
|
cemu_assert_debug(strategyCost != INT_MAX);
|
|
raInstructionEdge currentRangeStart = currentRange->interval.start;
|
|
// use available register
|
|
raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true);
|
|
if (tailRange)
|
|
{
|
|
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
|
|
tailRange->UnsetPhysicalRegister();
|
|
}
|
|
}
|
|
|
|
private:
|
|
struct
|
|
{
|
|
sint32 physRegister;
|
|
sint32 distance; // size of hole
|
|
} availableRegisterHole;
|
|
};
|
|
|
|
class RASpillStrategy_ExplodeRange : public RASpillStrategy
|
|
{
|
|
public:
|
|
void Reset()
|
|
{
|
|
ResetCost();
|
|
explodeRange.range = nullptr;
|
|
explodeRange.distance = -1;
|
|
}
|
|
|
|
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
|
|
{
|
|
raInstructionEdge currentRangeStart = currentRange->interval.start;
|
|
if (currentRangeStart.ConnectsToPreviousSegment())
|
|
currentRangeStart.Set(0, true);
|
|
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
|
|
cemu_assert_debug(strategyCost == INT_MAX);
|
|
explodeRange.range = nullptr;
|
|
explodeRange.distance = -1;
|
|
for (auto candidate : timeline.activeRanges)
|
|
{
|
|
if (!candidate->interval.ExtendsIntoNextSegment())
|
|
continue;
|
|
// new checks (Oct 2024):
|
|
if (candidate == currentRange)
|
|
continue;
|
|
if (candidate->GetPhysicalRegister() < 0)
|
|
continue;
|
|
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
|
|
continue;
|
|
|
|
sint32 distance = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart);
|
|
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
|
|
if (distance < 2)
|
|
continue;
|
|
sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
|
|
// if the hole is not large enough, add cost of splitting current subrange
|
|
if (distance < requiredSize2)
|
|
{
|
|
cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
|
|
// add small additional cost for the remaining range (prefer larger holes)
|
|
cost += ((requiredSize2 - distance) / 2) / 10;
|
|
}
|
|
// compare with current best candidate for this strategy
|
|
if (cost < strategyCost)
|
|
{
|
|
strategyCost = cost;
|
|
explodeRange.distance = distance;
|
|
explodeRange.range = candidate;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
|
|
{
|
|
raInstructionEdge currentRangeStart = currentRange->interval.start;
|
|
if (currentRangeStart.ConnectsToPreviousSegment())
|
|
currentRangeStart.Set(0, true);
|
|
sint32 requiredSize2 = currentRange->interval.GetPreciseDistance();
|
|
// explode range
|
|
IMLRA_ExplodeRangeCluster(nullptr, explodeRange.range);
|
|
// split current subrange if necessary
|
|
if (requiredSize2 > explodeRange.distance)
|
|
{
|
|
raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + explodeRange.distance, true);
|
|
if (tailRange)
|
|
{
|
|
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
|
|
tailRange->UnsetPhysicalRegister();
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
struct
|
|
{
|
|
raLivenessRange* range;
|
|
sint32 distance; // size of hole
|
|
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
|
|
} explodeRange;
|
|
};
|
|
|
|
class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
|
|
{
|
|
public:
|
|
void Reset()
|
|
{
|
|
ResetCost();
|
|
explodeRange.range = nullptr;
|
|
explodeRange.distance = -1;
|
|
}
|
|
|
|
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
|
|
{
|
|
// explode the range with the least cost
|
|
cemu_assert_debug(strategyCost == INT_MAX);
|
|
cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1);
|
|
for (auto candidate : timeline.activeRanges)
|
|
{
|
|
if (!candidate->interval.ExtendsIntoNextSegment())
|
|
continue;
|
|
// only select candidates that clash with current subrange
|
|
if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange)
|
|
continue;
|
|
// and also filter any that dont meet fixed register requirements
|
|
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
|
|
continue;
|
|
sint32 cost;
|
|
cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
|
|
// compare with current best candidate for this strategy
|
|
if (cost < strategyCost)
|
|
{
|
|
strategyCost = cost;
|
|
explodeRange.distance = INT_MAX;
|
|
explodeRange.range = candidate;
|
|
}
|
|
}
|
|
// add current range as a candidate too
|
|
sint32 ownCost;
|
|
ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange);
|
|
if (ownCost < strategyCost)
|
|
{
|
|
strategyCost = ownCost;
|
|
explodeRange.distance = INT_MAX;
|
|
explodeRange.range = currentRange;
|
|
}
|
|
}
|
|
|
|
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
|
|
{
|
|
cemu_assert_debug(strategyCost != INT_MAX);
|
|
IMLRA_ExplodeRangeCluster(ctx, explodeRange.range);
|
|
}
|
|
|
|
private:
|
|
struct
|
|
{
|
|
raLivenessRange* range;
|
|
sint32 distance; // size of hole
|
|
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
|
|
}explodeRange;
|
|
};
|
|
|
|
// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies
|
|
void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
|
|
{
|
|
IMLSegment* seg = currentRange->imlSegment;
|
|
if (seg->imlList.empty())
|
|
return; // there can be no fixed register requirements if there are no instructions
|
|
|
|
raInstructionEdge firstPos = currentRange->interval.start;
|
|
if (currentRange->interval.start.ConnectsToPreviousSegment())
|
|
firstPos.SetRaw(0);
|
|
else if (currentRange->interval.start.ConnectsToNextSegment())
|
|
firstPos.Set(seg->imlList.size() - 1, false);
|
|
|
|
raInstructionEdge lastPos = currentRange->interval.end;
|
|
if (currentRange->interval.end.ConnectsToPreviousSegment())
|
|
lastPos.SetRaw(0);
|
|
else if (currentRange->interval.end.ConnectsToNextSegment())
|
|
lastPos.Set(seg->imlList.size() - 1, false);
|
|
cemu_assert_debug(firstPos <= lastPos);
|
|
|
|
IMLRegID ourRegId = currentRange->GetVirtualRegister();
|
|
|
|
IMLFixedRegisters fixedRegs;
|
|
if (firstPos.IsOnOutputEdge())
|
|
GetInstructionFixedRegisters(seg->imlList.data() + firstPos.GetInstructionIndex(), fixedRegs);
|
|
for (raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos)
|
|
{
|
|
if (currentPos.IsOnInputEdge())
|
|
{
|
|
GetInstructionFixedRegisters(seg->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs);
|
|
}
|
|
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
|
|
for (auto& fixedRegLoc : fixedRegAccess)
|
|
{
|
|
if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
|
|
candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet);
|
|
}
|
|
}
|
|
}
|
|
|
|
// filter out any registers along the range cluster
|
|
void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
|
|
{
|
|
cemu_assert_debug(currentRange->imlSegment == imlSegment);
|
|
if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment())
|
|
{
|
|
auto clusterRanges = currentRange->GetAllSubrangesInCluster();
|
|
for (auto& rangeIt : clusterRanges)
|
|
{
|
|
IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet);
|
|
if (!candidatePhysRegSet.HasAnyAvailable())
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet);
|
|
}
|
|
|
|
bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
|
{
|
|
// sort subranges ascending by start index
|
|
_sortSegmentAllSubrangesLinkedList(imlSegment);
|
|
|
|
IMLRALivenessTimeline livenessTimeline;
|
|
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
|
raInstructionEdge lastInstructionEdge;
|
|
lastInstructionEdge.SetRaw(RA_INTER_RANGE_END);
|
|
|
|
struct
|
|
{
|
|
RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting;
|
|
RASpillStrategy_AvailableRegisterHole availableRegisterHole;
|
|
RASpillStrategy_ExplodeRange explodeRange;
|
|
// for ranges that connect to follow up segments:
|
|
RASpillStrategy_ExplodeRangeInter explodeRangeInter;
|
|
} strategy;
|
|
|
|
while (subrangeItr)
|
|
{
|
|
raInstructionEdge currentRangeStart = subrangeItr->interval.start; // used to be currentIndex before refactor
|
|
PPCRecRA_debugValidateSubrange(subrangeItr);
|
|
|
|
livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges
|
|
|
|
// if subrange already has register assigned then add it to the active list and continue
|
|
if (subrangeItr->GetPhysicalRegister() >= 0)
|
|
{
|
|
// verify if register is actually available
|
|
#if DEBUG_RA_EXTRA_VALIDATION
|
|
for (auto& liverangeItr : livenessTimeline.activeRanges)
|
|
{
|
|
// check for register mismatch
|
|
cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister());
|
|
}
|
|
#endif
|
|
livenessTimeline.AddActiveRange(subrangeItr);
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
continue;
|
|
}
|
|
// ranges with fixed register requirements should already have a phys register assigned
|
|
if (!subrangeItr->list_fixedRegRequirements.empty())
|
|
{
|
|
cemu_assert_debug(subrangeItr->HasPhysicalRegister());
|
|
}
|
|
// find free register for current subrangeItr and segment
|
|
IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister());
|
|
IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat);
|
|
cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type
|
|
|
|
IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet);
|
|
cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler)
|
|
candidatePhysRegSet &= allowedRegs;
|
|
|
|
for (auto& liverangeItr : livenessTimeline.activeRanges)
|
|
{
|
|
cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0);
|
|
candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister());
|
|
}
|
|
// check intersections with other ranges and determine allowed registers
|
|
IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
|
|
if (candidatePhysRegSet.HasAnyAvailable())
|
|
{
|
|
// check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments)
|
|
PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet);
|
|
}
|
|
// some target instructions may enforce specific registers (e.g. common on X86 where something like SHL <reg>, CL forces CL as the count register)
|
|
// we determine the list of allowed registers here
|
|
// this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet)
|
|
if (candidatePhysRegSet.HasAnyAvailable())
|
|
{
|
|
IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet);
|
|
}
|
|
if (candidatePhysRegSet.HasAnyAvailable())
|
|
{
|
|
// use free register
|
|
subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg());
|
|
livenessTimeline.AddActiveRange(subrangeItr);
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next; // next
|
|
continue;
|
|
}
|
|
// there is no free register for the entire range
|
|
// evaluate different strategies of splitting ranges to free up another register or shorten the current range
|
|
strategy.localRangeHoleCutting.Reset();
|
|
strategy.availableRegisterHole.Reset();
|
|
strategy.explodeRange.Reset();
|
|
// cant assign register
|
|
// there might be registers available, we just can't use them due to range conflicts
|
|
RASpillStrategy* selectedStrategy = nullptr;
|
|
auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) {
|
|
if (newStrategy.GetCost() == INT_MAX)
|
|
return;
|
|
if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost())
|
|
selectedStrategy = &newStrategy;
|
|
};
|
|
|
|
if (!subrangeItr->interval.ExtendsIntoNextSegment())
|
|
{
|
|
// range ends in current segment, use local strategies
|
|
// evaluate strategy: Cut hole into local subrange
|
|
strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
|
|
SelectStrategyIfBetter(strategy.localRangeHoleCutting);
|
|
// evaluate strategy: Split current range to fit in available holes
|
|
// todo - are checks required to avoid splitting on the suffix instruction?
|
|
strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs);
|
|
SelectStrategyIfBetter(strategy.availableRegisterHole);
|
|
// evaluate strategy: Explode inter-segment ranges
|
|
strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
|
|
SelectStrategyIfBetter(strategy.explodeRange);
|
|
}
|
|
else // if subrangeItr->interval2.ExtendsIntoNextSegment()
|
|
{
|
|
strategy.explodeRangeInter.Reset();
|
|
strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
|
|
SelectStrategyIfBetter(strategy.explodeRangeInter);
|
|
}
|
|
// choose strategy
|
|
if (selectedStrategy)
|
|
{
|
|
selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr);
|
|
}
|
|
else
|
|
{
|
|
// none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy
|
|
cemu_assert_debug(subrangeItr->interval.ExtendsPreviousSegment());
|
|
// alternative strategy if we have no other choice: explode current range
|
|
IMLRA_ExplodeRangeCluster(ppcImlGenContext, subrangeItr);
|
|
}
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext)
|
|
{
|
|
// start with frequently executed segments first
|
|
sint32 maxLoopDepth = 0;
|
|
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
|
{
|
|
maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth);
|
|
}
|
|
// assign fixed registers first
|
|
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
|
IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt);
|
|
#if DEBUG_RA_EXTRA_VALIDATION
|
|
// fixed registers are currently handled per-segment, but here we validate that they are assigned correctly on a global scope as well
|
|
for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2)
|
|
{
|
|
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
|
|
{
|
|
IMLPhysRegisterSet allowedRegs;
|
|
if (!currentRange->GetAllowedRegistersEx(allowedRegs))
|
|
{
|
|
cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
|
|
continue;
|
|
}
|
|
cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister()));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
while (true)
|
|
{
|
|
bool done = false;
|
|
for (sint32 d = maxLoopDepth; d >= 0; d--)
|
|
{
|
|
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
|
{
|
|
if (segIt->loopDepth != d)
|
|
continue;
|
|
done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt);
|
|
if (done == false)
|
|
break;
|
|
}
|
|
if (done == false)
|
|
break;
|
|
}
|
|
if (done)
|
|
break;
|
|
}
|
|
}
|
|
|
|
void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
|
|
{
|
|
// insert empty segments after every non-taken branch if the linked segment has more than one input
|
|
// this gives the register allocator more room to create efficient spill code
|
|
size_t segmentIndex = 0;
|
|
while (segmentIndex < ppcImlGenContext->segmentList2.size())
|
|
{
|
|
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
|
|
if (imlSegment->nextSegmentIsUncertain)
|
|
{
|
|
segmentIndex++;
|
|
continue;
|
|
}
|
|
if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr)
|
|
{
|
|
segmentIndex++;
|
|
continue;
|
|
}
|
|
if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1)
|
|
{
|
|
segmentIndex++;
|
|
continue;
|
|
}
|
|
if (imlSegment->nextSegmentBranchNotTaken->isEnterable)
|
|
{
|
|
segmentIndex++;
|
|
continue;
|
|
}
|
|
PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1);
|
|
IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0];
|
|
IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1];
|
|
IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken;
|
|
IMLSegment_RemoveLink(imlSegmentP0, nextSegment);
|
|
IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment);
|
|
IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
|
|
segmentIndex++;
|
|
}
|
|
// detect loops
|
|
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
|
{
|
|
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
|
|
imlSegment->momentaryIndex = s;
|
|
}
|
|
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
|
{
|
|
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
|
|
IMLRA_IdentifyLoop(ppcImlGenContext, imlSegment);
|
|
}
|
|
}
|
|
|
|
IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
|
|
{
|
|
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
auto it = segMap.find(regId);
|
|
return it != segMap.end() ? &it->second : nullptr;
|
|
}
|
|
|
|
// scan instructions and establish register usage range for segment
|
|
void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
|
{
|
|
size_t instructionIndex = 0;
|
|
IMLUsedRegisters gprTracking;
|
|
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
while (instructionIndex < imlSegment->imlList.size())
|
|
{
|
|
imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking);
|
|
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
|
|
IMLRegID gprId = gprReg.GetRegID();
|
|
auto it = segDistMap.find(gprId);
|
|
if (it == segDistMap.end())
|
|
{
|
|
segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1);
|
|
ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat());
|
|
}
|
|
else
|
|
{
|
|
it->second.TrackInstruction(instructionIndex);
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same
|
|
#endif
|
|
}
|
|
});
|
|
instructionIndex++;
|
|
}
|
|
}
|
|
|
|
void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
|
|
{
|
|
// for each register calculate min/max index of usage range within each segment
|
|
size_t dbgIndex = 0;
|
|
for (IMLSegment* segIt : ctx.deprGenContext->segmentList2)
|
|
{
|
|
cemu_assert_debug(segIt->momentaryIndex == dbgIndex);
|
|
IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt);
|
|
dbgIndex++;
|
|
}
|
|
}
|
|
|
|
raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name)
|
|
{
|
|
IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR);
|
|
if (!abstractRange)
|
|
return nullptr;
|
|
if (abstractRange->isProcessed)
|
|
{
|
|
// return already existing segment
|
|
raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR);
|
|
cemu_assert_debug(existingRange);
|
|
return existingRange;
|
|
}
|
|
abstractRange->isProcessed = true;
|
|
// create subrange
|
|
cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
|
|
cemu_assert_debug(
|
|
(abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) ||
|
|
abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger
|
|
sint32 inclusiveEnd = abstractRange->usageEnd;
|
|
if (inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END)
|
|
inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive
|
|
raInterval interval;
|
|
interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true);
|
|
raLivenessRange* subrange = IMLRA_CreateRange(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end);
|
|
// traverse forward
|
|
if (abstractRange->usageEnd == RA_INTER_RANGE_END)
|
|
{
|
|
if (imlSegment->nextSegmentBranchTaken)
|
|
{
|
|
IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR);
|
|
if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START)
|
|
{
|
|
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name);
|
|
subrange->subrangeBranchTaken->previousRanges.push_back(subrange);
|
|
cemu_assert_debug(subrange->subrangeBranchTaken->interval.ExtendsPreviousSegment());
|
|
}
|
|
}
|
|
if (imlSegment->nextSegmentBranchNotTaken)
|
|
{
|
|
IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR);
|
|
if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START)
|
|
{
|
|
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name);
|
|
subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange);
|
|
cemu_assert_debug(subrange->subrangeBranchNotTaken->interval.ExtendsPreviousSegment());
|
|
}
|
|
}
|
|
}
|
|
// traverse backward
|
|
if (abstractRange->usageStart == RA_INTER_RANGE_START)
|
|
{
|
|
for (auto& it : imlSegment->list_prevSegments)
|
|
{
|
|
IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR);
|
|
if (!prevRange)
|
|
continue;
|
|
if (prevRange->usageEnd == RA_INTER_RANGE_END)
|
|
PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
|
|
}
|
|
}
|
|
return subrange;
|
|
}
|
|
|
|
void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos)
|
|
{
|
|
if (subrange->list_accessLocations.empty())
|
|
{
|
|
subrange->list_accessLocations.emplace_back(pos);
|
|
return;
|
|
}
|
|
if(subrange->list_accessLocations.back().pos == pos)
|
|
return;
|
|
cemu_assert_debug(subrange->list_accessLocations.back().pos < pos);
|
|
subrange->list_accessLocations.emplace_back(pos);
|
|
}
|
|
|
|
// take abstract range data and create LivenessRanges
|
|
void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
|
{
|
|
const std::unordered_map<IMLRegID, raLivenessRange*>& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
|
|
|
|
auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) {
|
|
raLivenessRange* subrange = regToSubrange.find(regId)->second;
|
|
cemu_assert_debug(subrange);
|
|
raFixedRegRequirement tmp;
|
|
tmp.pos.Set(instructionIndex, isInput);
|
|
tmp.allowedReg = physRegSet;
|
|
if (subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos)
|
|
subrange->list_fixedRegRequirements.push_back(tmp);
|
|
};
|
|
|
|
// convert abstract min-max ranges to liveness range objects
|
|
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
for (auto& it : segMap)
|
|
{
|
|
if (it.second.isProcessed)
|
|
continue;
|
|
IMLRegID regId = it.first;
|
|
PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second);
|
|
}
|
|
// fill created ranges with read/write location indices
|
|
// note that at this point there is only one range per register per segment
|
|
// and the algorithm below relies on this
|
|
size_t index = 0;
|
|
IMLUsedRegisters gprTracking;
|
|
while (index < imlSegment->imlList.size())
|
|
{
|
|
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
|
|
raInstructionEdge pos((sint32)index, true);
|
|
gprTracking.ForEachReadGPR([&](IMLReg gprReg) {
|
|
IMLRegID gprId = gprReg.GetRegID();
|
|
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
|
|
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
|
|
});
|
|
pos = {(sint32)index, false};
|
|
gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) {
|
|
IMLRegID gprId = gprReg.GetRegID();
|
|
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
|
|
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
|
|
});
|
|
// check fixed register requirements
|
|
IMLFixedRegisters fixedRegs;
|
|
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
|
|
for (auto& fixedRegAccess : fixedRegs.listInput)
|
|
{
|
|
if (fixedRegAccess.reg != IMLREG_INVALID)
|
|
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet);
|
|
}
|
|
for (auto& fixedRegAccess : fixedRegs.listOutput)
|
|
{
|
|
if (fixedRegAccess.reg != IMLREG_INVALID)
|
|
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet);
|
|
}
|
|
index++;
|
|
}
|
|
}
|
|
|
|
void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
|
|
{
|
|
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
auto it = segDistMap.find(regId);
|
|
if (it == segDistMap.end())
|
|
{
|
|
sint32 startIndex;
|
|
if (imlSegment->HasSuffixInstruction())
|
|
startIndex = imlSegment->GetSuffixInstructionIndex();
|
|
else
|
|
startIndex = RA_INTER_RANGE_END;
|
|
segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END);
|
|
}
|
|
else
|
|
{
|
|
it->second.usageEnd = RA_INTER_RANGE_END;
|
|
}
|
|
}
|
|
|
|
void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
|
|
{
|
|
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
auto it = segDistMap.find(regId);
|
|
if (it == segDistMap.end())
|
|
{
|
|
segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START);
|
|
}
|
|
else
|
|
{
|
|
it->second.usageStart = RA_INTER_RANGE_START;
|
|
}
|
|
// propagate backwards
|
|
for (auto& it : imlSegment->list_prevSegments)
|
|
{
|
|
IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId);
|
|
}
|
|
}
|
|
|
|
void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth)
|
|
{
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
if (routeDepth < 2)
|
|
assert_dbg();
|
|
#endif
|
|
// extend starting range to end of segment
|
|
IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId);
|
|
// extend all the connecting segments in both directions
|
|
for (sint32 i = 1; i < (routeDepth - 1); i++)
|
|
{
|
|
IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId);
|
|
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId);
|
|
}
|
|
// extend the final segment towards the beginning
|
|
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId);
|
|
}
|
|
|
|
void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth)
|
|
{
|
|
if (routeDepth >= 64)
|
|
{
|
|
cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n");
|
|
return;
|
|
}
|
|
route[routeDepth] = currentSegment;
|
|
|
|
IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID);
|
|
|
|
if (!range)
|
|
{
|
|
// measure distance over entire segment
|
|
distanceLeft -= (sint32)currentSegment->imlList.size();
|
|
if (distanceLeft > 0)
|
|
{
|
|
if (currentSegment->nextSegmentBranchNotTaken)
|
|
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1);
|
|
if (currentSegment->nextSegmentBranchTaken)
|
|
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1);
|
|
}
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
// measure distance to range
|
|
if (range->usageStart == RA_INTER_RANGE_END)
|
|
{
|
|
if (distanceLeft < (sint32)currentSegment->imlList.size())
|
|
return; // range too far away
|
|
}
|
|
else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft)
|
|
return; // out of range
|
|
// found close range -> connect ranges
|
|
IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1);
|
|
}
|
|
}
|
|
|
|
void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID)
|
|
{
|
|
cemu_assert_debug(range->usageEnd >= 0);
|
|
// count instructions to end of initial segment
|
|
sint32 instructionsUntilEndOfSeg;
|
|
if (range->usageEnd == RA_INTER_RANGE_END)
|
|
instructionsUntilEndOfSeg = 0;
|
|
else
|
|
instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd;
|
|
cemu_assert_debug(instructionsUntilEndOfSeg >= 0);
|
|
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
|
|
if (remainingScanDist <= 0)
|
|
return; // can't reach end
|
|
|
|
IMLSegment* route[64];
|
|
route[0] = currentSegment;
|
|
if (currentSegment->nextSegmentBranchNotTaken)
|
|
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1);
|
|
if (currentSegment->nextSegmentBranchTaken)
|
|
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1);
|
|
}
|
|
|
|
void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
|
{
|
|
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
for (auto& it : segMap)
|
|
{
|
|
PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first);
|
|
}
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
|
assert_dbg();
|
|
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
|
|
assert_dbg();
|
|
#endif
|
|
}
|
|
|
|
void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
|
{
|
|
std::vector<IMLSegment*> list_segments;
|
|
std::vector<bool> list_processedSegment;
|
|
size_t segmentCount = ctx.deprGenContext->segmentList2.size();
|
|
list_segments.reserve(segmentCount + 1);
|
|
list_processedSegment.resize(segmentCount);
|
|
|
|
auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {
|
|
list_processedSegment[seg->momentaryIndex] = true;
|
|
};
|
|
auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool {
|
|
return list_processedSegment[seg->momentaryIndex];
|
|
};
|
|
markSegProcessed(imlSegment);
|
|
|
|
sint32 index = 0;
|
|
list_segments.push_back(imlSegment);
|
|
while (index < list_segments.size())
|
|
{
|
|
IMLSegment* currentSegment = list_segments[index];
|
|
PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment);
|
|
// follow flow
|
|
if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken))
|
|
{
|
|
markSegProcessed(currentSegment->nextSegmentBranchNotTaken);
|
|
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
|
|
}
|
|
if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken))
|
|
{
|
|
markSegProcessed(currentSegment->nextSegmentBranchTaken);
|
|
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
|
|
}
|
|
index++;
|
|
}
|
|
}
|
|
|
|
void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx)
|
|
{
|
|
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
|
{
|
|
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
|
|
if (!imlSegment->list_prevSegments.empty())
|
|
continue; // not an entry/standalone segment
|
|
PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment);
|
|
}
|
|
}
|
|
|
|
void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx)
|
|
{
|
|
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
|
{
|
|
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
|
|
auto localLoopDepth = imlSegment->loopDepth;
|
|
if (localLoopDepth <= 0)
|
|
continue; // not inside a loop
|
|
// look for loop exit
|
|
bool hasLoopExit = false;
|
|
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
|
|
{
|
|
hasLoopExit = true;
|
|
}
|
|
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
|
|
{
|
|
hasLoopExit = true;
|
|
}
|
|
if (hasLoopExit == false)
|
|
continue;
|
|
|
|
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
|
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
|
|
for (auto& it : segMap)
|
|
{
|
|
if (it.second.usageEnd != RA_INTER_RANGE_END)
|
|
continue;
|
|
if (imlSegment->nextSegmentBranchTaken)
|
|
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first);
|
|
if (imlSegment->nextSegmentBranchNotTaken)
|
|
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first);
|
|
}
|
|
}
|
|
}
|
|
|
|
void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
|
|
{
|
|
IMLRA_MergeCloseAbstractRanges(ctx);
|
|
// extra pass to move register loads and stores out of loops
|
|
IMLRA_ExtendAbstractRangesOutOfLoops(ctx);
|
|
// calculate liveness ranges
|
|
for (auto& segIt : ctx.deprGenContext->segmentList2)
|
|
IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt);
|
|
}
|
|
|
|
void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
|
|
{
|
|
bool isRead = false;
|
|
bool isWritten = false;
|
|
bool isOverwritten = false;
|
|
for (auto& location : subrange->list_accessLocations)
|
|
{
|
|
if (location.IsRead())
|
|
{
|
|
isRead = true;
|
|
}
|
|
if (location.IsWrite())
|
|
{
|
|
if (isRead == false)
|
|
isOverwritten = true;
|
|
isWritten = true;
|
|
}
|
|
}
|
|
subrange->_noLoad = isOverwritten;
|
|
subrange->hasStore = isWritten;
|
|
|
|
if (subrange->interval.ExtendsPreviousSegment())
|
|
subrange->_noLoad = true;
|
|
}
|
|
|
|
struct subrangeEndingInfo_t
|
|
{
|
|
raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
|
|
sint32 subrangeCount;
|
|
|
|
bool hasUndefinedEndings;
|
|
};
|
|
|
|
void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
|
|
{
|
|
if (depth >= 30)
|
|
{
|
|
info->hasUndefinedEndings = true;
|
|
return;
|
|
}
|
|
if (subrange->lastIterationIndex == iterationIndex)
|
|
return; // already processed
|
|
subrange->lastIterationIndex = iterationIndex;
|
|
if (subrange->hasStoreDelayed)
|
|
return; // no need to traverse this subrange
|
|
IMLSegment* imlSegment = subrange->imlSegment;
|
|
if (!subrange->interval.ExtendsIntoNextSegment())
|
|
{
|
|
// ending segment
|
|
if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
|
|
{
|
|
info->hasUndefinedEndings = true;
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
info->subrangeList[info->subrangeCount] = subrange;
|
|
info->subrangeCount++;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// traverse next subranges in flow
|
|
if (imlSegment->nextSegmentBranchNotTaken)
|
|
{
|
|
if (subrange->subrangeBranchNotTaken == nullptr)
|
|
{
|
|
info->hasUndefinedEndings = true;
|
|
}
|
|
else
|
|
{
|
|
_findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
|
|
}
|
|
}
|
|
if (imlSegment->nextSegmentBranchTaken)
|
|
{
|
|
if (subrange->subrangeBranchTaken == nullptr)
|
|
{
|
|
info->hasUndefinedEndings = true;
|
|
}
|
|
else
|
|
{
|
|
_findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
|
|
{
|
|
if (!subrange->interval.ExtendsIntoNextSegment())
|
|
return;
|
|
// analyze data flow across segments (if this segment has writes)
|
|
if (subrange->hasStore)
|
|
{
|
|
subrangeEndingInfo_t writeEndingInfo;
|
|
writeEndingInfo.subrangeCount = 0;
|
|
writeEndingInfo.hasUndefinedEndings = false;
|
|
_findSubrangeWriteEndings(subrange, IMLRA_GetNextIterationIndex(), 0, &writeEndingInfo);
|
|
if (writeEndingInfo.hasUndefinedEndings == false)
|
|
{
|
|
// get cost of delaying store into endings
|
|
sint32 delayStoreCost = 0;
|
|
bool alreadyStoredInAllEndings = true;
|
|
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
|
|
{
|
|
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
|
|
if (subrangeItr->hasStore)
|
|
continue; // this ending already stores, no extra cost
|
|
alreadyStoredInAllEndings = false;
|
|
sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment);
|
|
delayStoreCost = std::max(storeCost, delayStoreCost);
|
|
}
|
|
if (alreadyStoredInAllEndings)
|
|
{
|
|
subrange->hasStore = false;
|
|
subrange->hasStoreDelayed = true;
|
|
}
|
|
else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment))
|
|
{
|
|
subrange->hasStore = false;
|
|
subrange->hasStoreDelayed = true;
|
|
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
|
|
{
|
|
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
|
|
subrangeItr->hasStore = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
|
|
{
|
|
// this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore
|
|
// track read/write dependencies per segment
|
|
for (auto& seg : ppcImlGenContext->segmentList2)
|
|
{
|
|
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
|
|
while (subrange)
|
|
{
|
|
IMLRA_AnalyzeSubrangeDataDependency(subrange);
|
|
subrange = subrange->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
// propagate information across segment boundaries
|
|
for (auto& seg : ppcImlGenContext->segmentList2)
|
|
{
|
|
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
|
|
while (subrange)
|
|
{
|
|
IMLRA_AnalyzeRangeDataFlow(subrange);
|
|
subrange = subrange->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Generate move instructions */
|
|
|
|
inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
|
|
{
|
|
return IMLReg(baseFormat, baseFormat, 0, regId);
|
|
}
|
|
|
|
// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts
|
|
void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
|
{
|
|
std::unordered_map<IMLRegID, IMLRegID> virtId2PhysReg;
|
|
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
|
|
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
|
|
raInstructionEdge currentEdge;
|
|
for (size_t i = 0; i < imlSegment->imlList.size(); i++)
|
|
{
|
|
currentEdge.Set(i, false); // set to instruction index on output edge
|
|
// activate ranges which begin before or during this instruction
|
|
while (currentRange && currentRange->interval.start <= currentEdge)
|
|
{
|
|
cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict
|
|
|
|
virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister();
|
|
activeRanges.push_back(currentRange);
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
// rewrite registers
|
|
imlSegment->imlList[i].RewriteGPR(virtId2PhysReg);
|
|
// deactivate ranges which end during this instruction
|
|
auto it = activeRanges.begin();
|
|
while (it != activeRanges.end())
|
|
{
|
|
if ((*it)->interval.end <= currentEdge)
|
|
{
|
|
virtId2PhysReg.erase((*it)->GetVirtualRegister());
|
|
it = activeRanges.erase(it);
|
|
}
|
|
else
|
|
++it;
|
|
}
|
|
}
|
|
}
|
|
|
|
void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
|
{
|
|
IMLRA_RewriteRegisters(ctx, imlSegment);
|
|
|
|
#if DEBUG_RA_INSTRUCTION_GEN
|
|
cemuLog_log(LogType::Force, "");
|
|
cemuLog_log(LogType::Force, "[Seg before RA]");
|
|
IMLDebug_DumpSegment(nullptr, imlSegment, true);
|
|
#endif
|
|
|
|
bool hadSuffixInstruction = imlSegment->HasSuffixInstruction();
|
|
|
|
std::vector<IMLInstruction> rebuiltInstructions;
|
|
sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0);
|
|
|
|
if (imlSegment->imlList.empty())
|
|
{
|
|
// empty segments need special handling (todo - look into merging this with the core logic below eventually)
|
|
// store all ranges
|
|
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (currentRange)
|
|
{
|
|
if (currentRange->hasStore)
|
|
rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()));
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
// load ranges
|
|
currentRange = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (currentRange)
|
|
{
|
|
if (!currentRange->_noLoad)
|
|
{
|
|
cemu_assert_debug(currentRange->interval.ExtendsIntoNextSegment());
|
|
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
|
|
}
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
imlSegment->imlList = std::move(rebuiltInstructions);
|
|
return;
|
|
}
|
|
|
|
// make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?)
|
|
{
|
|
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
|
|
raInstructionEdge edge;
|
|
if (imlSegment->HasSuffixInstruction())
|
|
edge.Set(numInstructionsWithoutSuffix, true);
|
|
else
|
|
edge.Set(numInstructionsWithoutSuffix - 1, false);
|
|
|
|
while (currentRange)
|
|
{
|
|
if (!currentRange->interval.IsNextSegmentOnly() && currentRange->interval.end > edge)
|
|
{
|
|
currentRange->interval.SetEnd(edge);
|
|
}
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
|
|
#if DEBUG_RA_INSTRUCTION_GEN
|
|
cemuLog_log(LogType::Force, "");
|
|
cemuLog_log(LogType::Force, "--- Intermediate liveness info ---");
|
|
{
|
|
raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (dbgRange)
|
|
{
|
|
cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString());
|
|
dbgRange = dbgRange->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
|
|
// first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early
|
|
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
|
|
// make all ranges active that start on RA_INTER_RANGE_START
|
|
while (currentRange && currentRange->interval.start.ConnectsToPreviousSegment())
|
|
{
|
|
activeRanges.push_back(currentRange);
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
// store all ranges that end before the first output edge (includes RA_INTER_RANGE_START)
|
|
auto it = activeRanges.begin();
|
|
raInstructionEdge firstOutputEdge;
|
|
firstOutputEdge.Set(0, false);
|
|
while (it != activeRanges.end())
|
|
{
|
|
if ((*it)->interval.end < firstOutputEdge)
|
|
{
|
|
raLivenessRange* storedRange = *it;
|
|
if (storedRange->hasStore)
|
|
rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
|
|
it = activeRanges.erase(it);
|
|
continue;
|
|
}
|
|
++it;
|
|
}
|
|
|
|
sint32 numInstructions = (sint32)imlSegment->imlList.size();
|
|
for (sint32 i = 0; i < numInstructions; i++)
|
|
{
|
|
raInstructionEdge curEdge;
|
|
// input edge
|
|
curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction
|
|
while (currentRange && currentRange->interval.start <= curEdge)
|
|
{
|
|
if (!currentRange->_noLoad)
|
|
{
|
|
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
|
|
}
|
|
activeRanges.push_back(currentRange);
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
// copy instruction
|
|
rebuiltInstructions.push_back(imlSegment->imlList[i]);
|
|
// output edge
|
|
curEdge.SetRaw(i * 2 + 1 + 1);
|
|
// also store ranges that end on the next input edge, we handle this by adding an extra 1 above
|
|
auto it = activeRanges.begin();
|
|
while (it != activeRanges.end())
|
|
{
|
|
if ((*it)->interval.end <= curEdge)
|
|
{
|
|
// range expires
|
|
// todo - check hasStore
|
|
raLivenessRange* storedRange = *it;
|
|
if (storedRange->hasStore)
|
|
{
|
|
cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix
|
|
rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
|
|
}
|
|
it = activeRanges.erase(it);
|
|
continue;
|
|
}
|
|
++it;
|
|
}
|
|
}
|
|
// if there is no suffix instruction we currently need to handle the final loads here
|
|
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
|
|
if (imlSegment->HasSuffixInstruction())
|
|
{
|
|
cemu_assert_debug(!currentRange); // currentRange should be NULL?
|
|
for (auto& remainingRange : activeRanges)
|
|
{
|
|
cemu_assert_debug(!remainingRange->hasStore);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (auto& remainingRange : activeRanges)
|
|
{
|
|
cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored
|
|
}
|
|
while (currentRange)
|
|
{
|
|
cemu_assert_debug(currentRange->interval.IsNextSegmentOnly());
|
|
cemu_assert_debug(!currentRange->_noLoad);
|
|
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
|
|
currentRange = currentRange->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
|
|
imlSegment->imlList = std::move(rebuiltInstructions);
|
|
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
|
|
|
|
#if DEBUG_RA_INSTRUCTION_GEN
|
|
cemuLog_log(LogType::Force, "");
|
|
cemuLog_log(LogType::Force, "[Seg after RA]");
|
|
IMLDebug_DumpSegment(nullptr, imlSegment, false);
|
|
#endif
|
|
}
|
|
|
|
void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx)
|
|
{
|
|
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
|
{
|
|
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
|
|
IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment);
|
|
}
|
|
}
|
|
|
|
static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment)
|
|
{
|
|
#if DEBUG_RA_EXTRA_VALIDATION
|
|
std::vector<raFixedRegRequirementWithVGPR> frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
|
|
for(auto& fixedReq : frr)
|
|
{
|
|
for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next)
|
|
{
|
|
if (!range->interval2.ContainsEdge(fixedReq.pos))
|
|
continue;
|
|
// verify if the requirement is compatible
|
|
if(range->GetVirtualRegister() == fixedReq.regId)
|
|
{
|
|
cemu_assert(range->HasPhysicalRegister());
|
|
cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register
|
|
}
|
|
else
|
|
{
|
|
cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
|
|
{
|
|
#if DEBUG_RA_EXTRA_VALIDATION
|
|
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
|
{
|
|
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
|
|
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
|
while (subrangeItr)
|
|
{
|
|
PPCRecRA_debugValidateSubrange(subrangeItr);
|
|
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
|
}
|
|
}
|
|
// check that no range validates register requirements
|
|
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
|
{
|
|
DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
|
|
{
|
|
IMLRegisterAllocatorContext ctx;
|
|
ctx.raParam = &raParam;
|
|
ctx.deprGenContext = ppcImlGenContext;
|
|
|
|
IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext);
|
|
ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment
|
|
ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size());
|
|
IMLRA_CalculateLivenessRanges(ctx);
|
|
IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx);
|
|
IMLRA_AssignRegisters(ctx, ppcImlGenContext);
|
|
DbgVerifyAllRanges(ctx);
|
|
IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext);
|
|
IMLRA_GenerateMoveInstructions(ctx);
|
|
|
|
IMLRA_DeleteAllRanges(ppcImlGenContext);
|
|
}
|