diff --git a/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp b/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp index 181ba4e7d8772..6a072354972ac 100644 --- a/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp +++ b/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp @@ -100,6 +100,25 @@ bool ArchitectureAArch64::ReconfigureRegisterInfo(DynamicRegisterInfo ®_info, if (reg_value != fail_value && reg_value <= 32) svg_reg_value = reg_value; } + if (!svg_reg_value) { + const RegisterInfo *darwin_svg_reg_info = reg_info.GetRegisterInfo("svl"); + if (darwin_svg_reg_info) { + uint32_t svg_reg_num = darwin_svg_reg_info->kinds[eRegisterKindLLDB]; + uint64_t reg_value = + reg_context.ReadRegisterAsUnsigned(svg_reg_num, fail_value); + // UpdateARM64SVERegistersInfos and UpdateARM64SMERegistersInfos + // expect the number of 8-byte granules; darwin provides number of + // bytes. + if (reg_value != fail_value && reg_value <= 256) { + svg_reg_value = reg_value / 8; + // Apple hardware only implements Streaming SVE mode, so + // the non-streaming Vector Length is not reported by the + // kernel. Set both svg and vg to this svl value. + if (!vg_reg_value) + vg_reg_value = reg_value / 8; + } + } + } if (!vg_reg_value && !svg_reg_value) return false; diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py index bfd7a382064e9..0b80a09534371 100644 --- a/lldb/test/API/commands/register/register/register_command/TestRegisters.py +++ b/lldb/test/API/commands/register/register/register_command/TestRegisters.py @@ -21,6 +21,24 @@ def tearDown(self): self.dbg.GetSelectedTarget().GetProcess().Destroy() TestBase.tearDown(self) + # on macOS, detect if the current machine is arm64 and supports SME + def get_sme_available(self): + if self.getArchitecture() != "arm64": + return None + try: + sysctl_output = subprocess.check_output( + ["sysctl", "hw.optional.arm.FEAT_SME"] + ).decode("utf-8") + except subprocess.CalledProcessError: + return None + m = re.match(r"hw\.optional\.arm\.FEAT_SME: (\w+)", sysctl_output) + if m: + if int(m.group(1)) == 1: + return True + else: + return False + return None + @skipIfiOSSimulator @skipIf(archs=no_match(["amd64", "arm", "i386", "x86_64"])) @expectedFailureAll(oslist=["freebsd", "netbsd"], bugnumber="llvm.org/pr48371") @@ -32,11 +50,19 @@ def test_register_commands(self): # verify that logging does not assert self.log_enable("registers") + error_str_matched = False + if self.get_sme_available() and self.platformIsDarwin(): + # On Darwin AArch64 SME machines, we will have unavailable + # registers when not in Streaming SVE Mode/SME, so + # `register read -a` will report that some registers + # could not be read. This is expected. + error_str_matched = True + self.expect( "register read -a", MISSING_EXPECTED_REGISTERS, substrs=["registers were unavailable"], - matching=False, + matching=error_str_matched, ) all_registers = self.res.GetOutput() @@ -60,7 +86,7 @@ def test_register_commands(self): self.runCmd("register read q15") # may be available self.expect( - "register read -s 4", substrs=["invalid register set index: 4"], error=True + "register read -s 8", substrs=["invalid register set index: 8"], error=True ) @skipIfiOSSimulator diff --git a/lldb/test/API/macosx/sme-registers/Makefile b/lldb/test/API/macosx/sme-registers/Makefile new file mode 100644 index 0000000000000..d4173d262ed27 --- /dev/null +++ b/lldb/test/API/macosx/sme-registers/Makefile @@ -0,0 +1,5 @@ +C_SOURCES := main.c + +CFLAGS_EXTRAS := -mcpu=apple-m4 + +include Makefile.rules diff --git a/lldb/test/API/macosx/sme-registers/TestSMERegistersDarwin.py b/lldb/test/API/macosx/sme-registers/TestSMERegistersDarwin.py new file mode 100644 index 0000000000000..6f9d055cef506 --- /dev/null +++ b/lldb/test/API/macosx/sme-registers/TestSMERegistersDarwin.py @@ -0,0 +1,217 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +import os + + +class TestSMERegistersDarwin(TestBase): + NO_DEBUG_INFO_TESTCASE = True + mydir = TestBase.compute_mydir(__file__) + + @skipIfRemote + @skipUnlessDarwin + @skipUnlessFeature("hw.optional.arm.FEAT_SME") + @skipUnlessFeature("hw.optional.arm.FEAT_SME2") + # thread_set_state/thread_get_state only avail in macOS 15.4+ + @skipIf(macos_version=["<", "15.4"]) + def test(self): + """Test that we can read the contents of the SME/SVE registers on Darwin""" + self.build() + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "break before sme", lldb.SBFileSpec("main.c") + ) + frame = thread.GetFrameAtIndex(0) + self.assertTrue(frame.IsValid()) + + self.assertTrue( + target.BreakpointCreateBySourceRegex( + "break while sme", lldb.SBFileSpec("main.c") + ).IsValid() + ) + self.assertTrue( + target.BreakpointCreateBySourceRegex( + "break after sme", lldb.SBFileSpec("main.c") + ).IsValid() + ) + + if self.TraceOn(): + self.runCmd("reg read -a") + + self.assertTrue(frame.register["svl"].GetError().Fail()) + self.assertTrue(frame.register["z0"].GetError().Fail()) + self.assertTrue(frame.register["p0"].GetError().Fail()) + self.assertTrue(frame.register["za"].GetError().Fail()) + self.assertTrue(frame.register["zt0"].GetError().Fail()) + + process.Continue() + frame = thread.GetFrameAtIndex(0) + self.assertEqual(thread.GetStopReason(), lldb.eStopReasonBreakpoint) + + # Now in SME enabled mode + self.assertTrue(frame.register["svl"].GetError().Success()) + self.assertTrue(frame.register["z0"].GetError().Success()) + self.assertTrue(frame.register["p0"].GetError().Success()) + self.assertTrue(frame.register["za"].GetError().Success()) + self.assertTrue(frame.register["zt0"].GetError().Success()) + + # SSVE and SME modes should be enabled (reflecting PSTATE.SM and PSTATE.ZA) + svcr = frame.register["svcr"] + self.assertEqual(svcr.GetValueAsUnsigned(), 3) + + svl_reg = frame.register["svl"] + svl = svl_reg.GetValueAsUnsigned() + + z0 = frame.register["z0"] + self.assertEqual(z0.GetNumChildren(), svl) + self.assertEqual(z0.GetChildAtIndex(0).GetValueAsUnsigned(), 0x1) + self.assertEqual(z0.GetChildAtIndex(svl - 1).GetValueAsUnsigned(), 0x1) + + z31 = frame.register["z31"] + self.assertEqual(z31.GetNumChildren(), svl) + self.assertEqual(z31.GetChildAtIndex(0).GetValueAsUnsigned(), 32) + self.assertEqual(z31.GetChildAtIndex(svl - 1).GetValueAsUnsigned(), 32) + + p0 = frame.register["p0"] + self.assertEqual(p0.GetNumChildren(), svl / 8) + self.assertEqual(p0.GetChildAtIndex(0).GetValueAsUnsigned(), 0xFF) + self.assertEqual( + p0.GetChildAtIndex(p0.GetNumChildren() - 1).GetValueAsUnsigned(), 0xFF + ) + + p15 = frame.register["p15"] + self.assertEqual(p15.GetNumChildren(), svl / 8) + self.assertEqual(p15.GetChildAtIndex(0).GetValueAsUnsigned(), 0xFF) + self.assertEqual( + p15.GetChildAtIndex(p15.GetNumChildren() - 1).GetValueAsUnsigned(), 0xFF + ) + + za = frame.register["za"] + self.assertEqual(za.GetNumChildren(), (svl * svl)) + za_0 = za.GetChildAtIndex(0) + self.assertEqual(za_0.GetValueAsUnsigned(), 4) + za_final = za.GetChildAtIndex(za.GetNumChildren() - 1) + self.assertEqual(za_final.GetValueAsUnsigned(), 67) + + zt0 = frame.register["zt0"] + self.assertEqual(zt0.GetNumChildren(), 64) + zt0_0 = zt0.GetChildAtIndex(0) + self.assertEqual(zt0_0.GetValueAsUnsigned(), 0) + zt0_final = zt0.GetChildAtIndex(63) + self.assertEqual(zt0_final.GetValueAsUnsigned(), 63) + + # Modify all of the registers, instruction step, confirm that the + # registers have the new values. Without the instruction step, it's + # possible debugserver or lldb could lie about the write succeeding. + + z0_old_values = [] + z0_new_values = [] + z0_new_str = '"{' + for i in range(svl): + z0_old_values.append(z0.GetChildAtIndex(i).GetValueAsUnsigned()) + z0_new_values.append(z0_old_values[i] + 5) + z0_new_str = z0_new_str + ("0x%02x " % z0_new_values[i]) + z0_new_str = z0_new_str + '}"' + self.runCmd("reg write z0 %s" % z0_new_str) + + z31_old_values = [] + z31_new_values = [] + z31_new_str = '"{' + for i in range(svl): + z31_old_values.append(z31.GetChildAtIndex(i).GetValueAsUnsigned()) + z31_new_values.append(z31_old_values[i] + 3) + z31_new_str = z31_new_str + ("0x%02x " % z31_new_values[i]) + z31_new_str = z31_new_str + '}"' + self.runCmd("reg write z31 %s" % z31_new_str) + + p0_old_values = [] + p0_new_values = [] + p0_new_str = '"{' + for i in range(int(svl / 8)): + p0_old_values.append(p0.GetChildAtIndex(i).GetValueAsUnsigned()) + p0_new_values.append(p0_old_values[i] - 5) + p0_new_str = p0_new_str + ("0x%02x " % p0_new_values[i]) + p0_new_str = p0_new_str + '}"' + self.runCmd("reg write p0 %s" % p0_new_str) + + p15_old_values = [] + p15_new_values = [] + p15_new_str = '"{' + for i in range(int(svl / 8)): + p15_old_values.append(p15.GetChildAtIndex(i).GetValueAsUnsigned()) + p15_new_values.append(p15_old_values[i] - 8) + p15_new_str = p15_new_str + ("0x%02x " % p15_new_values[i]) + p15_new_str = p15_new_str + '}"' + self.runCmd("reg write p15 %s" % p15_new_str) + + za_old_values = [] + za_new_values = [] + za_new_str = '"{' + for i in range(svl * svl): + za_old_values.append(za.GetChildAtIndex(i).GetValueAsUnsigned()) + za_new_values.append(za_old_values[i] + 7) + za_new_str = za_new_str + ("0x%02x " % za_new_values[i]) + za_new_str = za_new_str + '}"' + self.runCmd("reg write za %s" % za_new_str) + + zt0_old_values = [] + zt0_new_values = [] + zt0_new_str = '"{' + for i in range(64): + zt0_old_values.append(zt0.GetChildAtIndex(i).GetValueAsUnsigned()) + zt0_new_values.append(zt0_old_values[i] + 2) + zt0_new_str = zt0_new_str + ("0x%02x " % zt0_new_values[i]) + zt0_new_str = zt0_new_str + '}"' + self.runCmd("reg write zt0 %s" % zt0_new_str) + + thread.StepInstruction(False) + frame = thread.GetFrameAtIndex(0) + + if self.TraceOn(): + self.runCmd("reg read -a") + + z0 = frame.register["z0"] + for i in range(z0.GetNumChildren()): + self.assertEqual( + z0_new_values[i], z0.GetChildAtIndex(i).GetValueAsUnsigned() + ) + + z31 = frame.register["z31"] + for i in range(z31.GetNumChildren()): + self.assertEqual( + z31_new_values[i], z31.GetChildAtIndex(i).GetValueAsUnsigned() + ) + + p0 = frame.register["p0"] + for i in range(p0.GetNumChildren()): + self.assertEqual( + p0_new_values[i], p0.GetChildAtIndex(i).GetValueAsUnsigned() + ) + + p15 = frame.register["p15"] + for i in range(p15.GetNumChildren()): + self.assertEqual( + p15_new_values[i], p15.GetChildAtIndex(i).GetValueAsUnsigned() + ) + + za = frame.register["za"] + for i in range(za.GetNumChildren()): + self.assertEqual( + za_new_values[i], za.GetChildAtIndex(i).GetValueAsUnsigned() + ) + + zt0 = frame.register["zt0"] + for i in range(zt0.GetNumChildren()): + self.assertEqual( + zt0_new_values[i], zt0.GetChildAtIndex(i).GetValueAsUnsigned() + ) + + process.Continue() + frame = thread.GetFrameAtIndex(0) + self.assertEqual(thread.GetStopReason(), lldb.eStopReasonBreakpoint) + + self.assertTrue(frame.register["svl"].GetError().Fail()) + self.assertTrue(frame.register["z0"].GetError().Fail()) + self.assertTrue(frame.register["p0"].GetError().Fail()) + self.assertTrue(frame.register["za"].GetError().Fail()) + self.assertTrue(frame.register["zt0"].GetError().Fail()) diff --git a/lldb/test/API/macosx/sme-registers/main.c b/lldb/test/API/macosx/sme-registers/main.c new file mode 100644 index 0000000000000..2ebddfc001f8f --- /dev/null +++ b/lldb/test/API/macosx/sme-registers/main.c @@ -0,0 +1,113 @@ +/// BUILT with +/// xcrun -sdk macosx.internal clang -mcpu=apple-m4 -g sme.c -o sme + +#include +#include +#include + +void write_sve_regs() { + asm volatile("ptrue p0.b\n\t"); + asm volatile("ptrue p1.h\n\t"); + asm volatile("ptrue p2.s\n\t"); + asm volatile("ptrue p3.d\n\t"); + asm volatile("pfalse p4.b\n\t"); + asm volatile("ptrue p5.b\n\t"); + asm volatile("ptrue p6.h\n\t"); + asm volatile("ptrue p7.s\n\t"); + asm volatile("ptrue p8.d\n\t"); + asm volatile("pfalse p9.b\n\t"); + asm volatile("ptrue p10.b\n\t"); + asm volatile("ptrue p11.h\n\t"); + asm volatile("ptrue p12.s\n\t"); + asm volatile("ptrue p13.d\n\t"); + asm volatile("pfalse p14.b\n\t"); + asm volatile("ptrue p15.b\n\t"); + + asm volatile("cpy z0.b, p0/z, #1\n\t"); + asm volatile("cpy z1.b, p5/z, #2\n\t"); + asm volatile("cpy z2.b, p10/z, #3\n\t"); + asm volatile("cpy z3.b, p15/z, #4\n\t"); + asm volatile("cpy z4.b, p0/z, #5\n\t"); + asm volatile("cpy z5.b, p5/z, #6\n\t"); + asm volatile("cpy z6.b, p10/z, #7\n\t"); + asm volatile("cpy z7.b, p15/z, #8\n\t"); + asm volatile("cpy z8.b, p0/z, #9\n\t"); + asm volatile("cpy z9.b, p5/z, #10\n\t"); + asm volatile("cpy z10.b, p10/z, #11\n\t"); + asm volatile("cpy z11.b, p15/z, #12\n\t"); + asm volatile("cpy z12.b, p0/z, #13\n\t"); + asm volatile("cpy z13.b, p5/z, #14\n\t"); + asm volatile("cpy z14.b, p10/z, #15\n\t"); + asm volatile("cpy z15.b, p15/z, #16\n\t"); + asm volatile("cpy z16.b, p0/z, #17\n\t"); + asm volatile("cpy z17.b, p5/z, #18\n\t"); + asm volatile("cpy z18.b, p10/z, #19\n\t"); + asm volatile("cpy z19.b, p15/z, #20\n\t"); + asm volatile("cpy z20.b, p0/z, #21\n\t"); + asm volatile("cpy z21.b, p5/z, #22\n\t"); + asm volatile("cpy z22.b, p10/z, #23\n\t"); + asm volatile("cpy z23.b, p15/z, #24\n\t"); + asm volatile("cpy z24.b, p0/z, #25\n\t"); + asm volatile("cpy z25.b, p5/z, #26\n\t"); + asm volatile("cpy z26.b, p10/z, #27\n\t"); + asm volatile("cpy z27.b, p15/z, #28\n\t"); + asm volatile("cpy z28.b, p0/z, #29\n\t"); + asm volatile("cpy z29.b, p5/z, #30\n\t"); + asm volatile("cpy z30.b, p10/z, #31\n\t"); + asm volatile("cpy z31.b, p15/z, #32\n\t"); +} + +#define MAX_VL_BYTES 256 +void set_za_register(int svl, int value_offset) { + uint8_t data[MAX_VL_BYTES]; + + // ldr za will actually wrap the selected vector row, by the number of rows + // you have. So setting one that didn't exist would actually set one that did. + // That's why we need the streaming vector length here. + for (int i = 0; i < svl; ++i) { + // This may involve instructions that require the smefa64 extension. + for (int j = 0; j < MAX_VL_BYTES; j++) + data[j] = i + value_offset; + // Each one of these loads a VL sized row of ZA. + asm volatile("mov w12, %w0\n\t" + "ldr za[w12, 0], [%1]\n\t" ::"r"(i), + "r"(&data) + : "w12"); + } +} + +static uint16_t arm_sme_svl_b(void) { + uint64_t ret = 0; + asm volatile("rdsvl %[ret], #1" : [ret] "=r"(ret)); + return (uint16_t)ret; +} + +void arm_sme2_set_zt0() { +#define ZTO_LEN (512 / 8) + uint8_t data[ZTO_LEN]; + for (unsigned i = 0; i < ZTO_LEN; ++i) + data[i] = i + 0; + + asm volatile("ldr zt0, [%0]" ::"r"(&data)); +#undef ZT0_LEN +} + +int main() { + printf("Enable SME mode\n"); // break before sme + + asm volatile("smstart"); + + write_sve_regs(); + + set_za_register(arm_sme_svl_b(), 4); + + arm_sme2_set_zt0(); + + int c = 10; // break while sme + c += 5; + c += 5; + + asm volatile("smstop"); + + printf("SME mode disabled\n"); // break after sme +} diff --git a/lldb/tools/debugserver/source/DNBDefs.h b/lldb/tools/debugserver/source/DNBDefs.h index dacee652b3ebf..df8ca809d412c 100644 --- a/lldb/tools/debugserver/source/DNBDefs.h +++ b/lldb/tools/debugserver/source/DNBDefs.h @@ -312,16 +312,21 @@ struct DNBRegisterValue { uint64_t uint64; float float32; double float64; - int8_t v_sint8[64]; - int16_t v_sint16[32]; - int32_t v_sint32[16]; - int64_t v_sint64[8]; - uint8_t v_uint8[64]; - uint16_t v_uint16[32]; - uint32_t v_uint32[16]; - uint64_t v_uint64[8]; - float v_float32[16]; - double v_float64[8]; + // AArch64 SME's ZA register max size is 64k, this object must be + // large enough to hold that much data. The current Apple cores + // have a much smaller maximum ZA reg size, but there are not + // multiple copies of this object so increase the static size to + // maximum possible. + int8_t v_sint8[65536]; + int16_t v_sint16[32768]; + int32_t v_sint32[16384]; + int64_t v_sint64[8192]; + uint8_t v_uint8[65536]; + uint16_t v_uint16[32768]; + uint32_t v_uint32[16384]; + uint64_t v_uint64[8192]; + float v_float32[16384]; + double v_float64[8192]; void *pointer; char *c_str; } value; diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm index 530c5b24b424e..a2179bf2f91e5 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm @@ -1417,15 +1417,17 @@ static uint64_t bits(uint64_t value, uint32_t msbit, uint32_t lsbit) { continue; for (uint32_t reg = 0; reg < reg_sets[set].num_registers; ++reg) { if (strcmp(reg_sets[set].registers[reg].name, "esr") == 0) { - DNBRegisterValue reg_value; - if (GetRegisterValue(tid, set, reg, ®_value)) { - esr = reg_value.value.uint64; + std::unique_ptr reg_value = + std::make_unique(); + if (GetRegisterValue(tid, set, reg, reg_value.get())) { + esr = reg_value->value.uint64; } } if (strcmp(reg_sets[set].registers[reg].name, "far") == 0) { - DNBRegisterValue reg_value; - if (GetRegisterValue(tid, set, reg, ®_value)) { - far = reg_value.value.uint64; + std::unique_ptr reg_value = + std::make_unique(); + if (GetRegisterValue(tid, set, reg, reg_value.get())) { + far = reg_value->value.uint64; } } } diff --git a/lldb/tools/debugserver/source/MacOSX/MachThread.cpp b/lldb/tools/debugserver/source/MacOSX/MachThread.cpp index de2bebfcec709..69e1c9bb0e252 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachThread.cpp +++ b/lldb/tools/debugserver/source/MacOSX/MachThread.cpp @@ -509,10 +509,12 @@ void MachThread::DumpRegisterState(nub_size_t regSet) { if (m_arch_up->RegisterSetStateIsValid((int)regSet)) { const size_t numRegisters = GetNumRegistersInSet(regSet); uint32_t regIndex = 0; - DNBRegisterValueClass reg; + std::unique_ptr reg = + std::make_unique(); for (regIndex = 0; regIndex < numRegisters; ++regIndex) { - if (m_arch_up->GetRegisterValue((uint32_t)regSet, regIndex, ®)) { - reg.Dump(NULL, NULL); + if (m_arch_up->GetRegisterValue((uint32_t)regSet, regIndex, + reg.get())) { + reg->Dump(NULL, NULL); } } } else { diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp index b6f52cb5cf496..34a4ee21f8502 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp @@ -93,6 +93,45 @@ DNBArchMachARM64::SoftwareBreakpointOpcode(nub_size_t byte_size) { uint32_t DNBArchMachARM64::GetCPUType() { return CPU_TYPE_ARM64; } +static std::once_flag g_cpu_has_sme_once; +bool DNBArchMachARM64::CPUHasSME() { + static bool g_has_sme = false; + std::call_once(g_cpu_has_sme_once, []() { + int ret = 0; + size_t size = sizeof(ret); + if (sysctlbyname("hw.optional.arm.FEAT_SME", &ret, &size, NULL, 0) != -1) + g_has_sme = ret == 1; + }); + return g_has_sme; +} + +static std::once_flag g_cpu_has_sme2_once; +bool DNBArchMachARM64::CPUHasSME2() { + static bool g_has_sme2 = false; + std::call_once(g_cpu_has_sme2_once, []() { + int ret = 0; + size_t size = sizeof(ret); + if (sysctlbyname("hw.optional.arm.FEAT_SME2", &ret, &size, NULL, 0) != -1) + g_has_sme2 = ret == 1; + }); + return g_has_sme2; +} + +static std::once_flag g_sme_max_svl_once; +unsigned int DNBArchMachARM64::GetSMEMaxSVL() { + static unsigned int g_sme_max_svl = 0; + std::call_once(g_sme_max_svl_once, []() { + if (CPUHasSME()) { + unsigned int ret = 0; + size_t size = sizeof(ret); + if (sysctlbyname("hw.optional.arm.sme_max_svl_b", &ret, &size, NULL, 0) != + -1) + g_sme_max_svl = ret; + } + }); + return g_sme_max_svl; +} + static uint64_t clear_pac_bits(uint64_t value) { uint32_t addressing_bits = 0; if (!DNBGetAddressingBits(addressing_bits)) @@ -415,6 +454,118 @@ kern_return_t DNBArchMachARM64::GetDBGState(bool force) { return kret; } +kern_return_t DNBArchMachARM64::GetSVEState(bool force) { + int set = e_regSetSVE; + // Check if we have valid cached registers + if (!force && m_state.GetError(set, Read) == KERN_SUCCESS) + return KERN_SUCCESS; + + if (!CPUHasSME()) + return KERN_INVALID_ARGUMENT; + + // If the processor is not in Streaming SVE Mode, these thread_get_states + // will fail, and we may return uninitialized data in the register context. + memset(&m_state.context.sve.z[0], 0, + ARM_SVE_Z_STATE_COUNT * sizeof(uint32_t)); + memset(&m_state.context.sve.z[16], 0, + ARM_SVE_Z_STATE_COUNT * sizeof(uint32_t)); + memset(&m_state.context.sve.p[0], 0, + ARM_SVE_P_STATE_COUNT * sizeof(uint32_t)); + + // Read the registers from our thread + mach_msg_type_number_t count = ARM_SVE_Z_STATE_COUNT; + kern_return_t kret = + ::thread_get_state(m_thread->MachPortNumber(), ARM_SVE_Z_STATE1, + (thread_state_t)&m_state.context.sve.z[0], &count); + m_state.SetError(set, Read, kret); + DNBLogThreadedIf(LOG_THREAD, "Read SVE registers z0..z15 return value %d", + kret); + if (kret != KERN_SUCCESS) + return kret; + + count = ARM_SVE_Z_STATE_COUNT; + kret = thread_get_state(m_thread->MachPortNumber(), ARM_SVE_Z_STATE2, + (thread_state_t)&m_state.context.sve.z[16], &count); + m_state.SetError(set, Read, kret); + DNBLogThreadedIf(LOG_THREAD, "Read SVE registers z16..z31 return value %d", + kret); + if (kret != KERN_SUCCESS) + return kret; + + count = ARM_SVE_P_STATE_COUNT; + kret = thread_get_state(m_thread->MachPortNumber(), ARM_SVE_P_STATE, + (thread_state_t)&m_state.context.sve.p[0], &count); + m_state.SetError(set, Read, kret); + DNBLogThreadedIf(LOG_THREAD, "Read SVE registers p0..p15 return value %d", + kret); + + return kret; +} + +kern_return_t DNBArchMachARM64::GetSMEState(bool force) { + int set = e_regSetSME; + // Check if we have valid cached registers + if (!force && m_state.GetError(set, Read) == KERN_SUCCESS) + return KERN_SUCCESS; + + if (!CPUHasSME()) + return KERN_INVALID_ARGUMENT; + + // If the processor is not in Streaming SVE Mode, these thread_get_states + // will fail, and we may return uninitialized data in the register context. + memset(&m_state.context.sme.svcr, 0, ARM_SME_STATE_COUNT * sizeof(uint32_t)); + memset(m_state.context.sme.za.data(), 0, m_state.context.sme.za.size()); + if (CPUHasSME2()) + memset(&m_state.context.sme.zt0, 0, + ARM_SME2_STATE_COUNT * sizeof(uint32_t)); + + // Read the registers from our thread + mach_msg_type_number_t count = ARM_SME_STATE_COUNT; + kern_return_t kret = + ::thread_get_state(m_thread->MachPortNumber(), ARM_SME_STATE, + (thread_state_t)&m_state.context.sme.svcr, &count); + m_state.SetError(set, Read, kret); + DNBLogThreadedIf(LOG_THREAD, "Read ARM_SME_STATE return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + + size_t za_size = m_state.context.sme.svl_b * m_state.context.sme.svl_b; + const size_t max_chunk_size = 4096; + int n_chunks; + size_t chunk_size; + if (za_size <= max_chunk_size) { + n_chunks = 1; + chunk_size = za_size; + } else { + n_chunks = za_size / max_chunk_size; + chunk_size = max_chunk_size; + } + for (int i = 0; i < n_chunks; i++) { + count = ARM_SME_ZA_STATE_COUNT; + arm_sme_za_state_t za_state; + kret = thread_get_state(m_thread->MachPortNumber(), ARM_SME_ZA_STATE1 + i, + (thread_state_t)&za_state, &count); + m_state.SetError(set, Read, kret); + DNBLogThreadedIf(LOG_THREAD, "Read ARM_SME_STATE return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + memcpy(m_state.context.sme.za.data() + (i * chunk_size), &za_state, + chunk_size); + } + + if (CPUHasSME2()) { + count = ARM_SME2_STATE_COUNT; + kret = thread_get_state(m_thread->MachPortNumber(), ARM_SME2_STATE, + (thread_state_t)&m_state.context.sme.zt0, &count); + m_state.SetError(set, Read, kret); + DNBLogThreadedIf(LOG_THREAD, "Read ARM_SME2_STATE return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + } + + return kret; +} + kern_return_t DNBArchMachARM64::SetGPRState() { int set = e_regSetGPR; kern_return_t kret = ::thread_set_state( @@ -441,6 +592,80 @@ kern_return_t DNBArchMachARM64::SetVFPState() { return kret; // Return the error code } +kern_return_t DNBArchMachARM64::SetSVEState() { + if (!CPUHasSME()) + return KERN_INVALID_ARGUMENT; + + int set = e_regSetSVE; + kern_return_t kret = thread_set_state( + m_thread->MachPortNumber(), ARM_SVE_Z_STATE1, + (thread_state_t)&m_state.context.sve.z[0], ARM_SVE_Z_STATE_COUNT); + m_state.SetError(set, Write, kret); + DNBLogThreadedIf(LOG_THREAD, "Write ARM_SVE_Z_STATE1 return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + + kret = thread_set_state(m_thread->MachPortNumber(), ARM_SVE_Z_STATE2, + (thread_state_t)&m_state.context.sve.z[16], + ARM_SVE_Z_STATE_COUNT); + m_state.SetError(set, Write, kret); + DNBLogThreadedIf(LOG_THREAD, "Write ARM_SVE_Z_STATE2 return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + + kret = thread_set_state(m_thread->MachPortNumber(), ARM_SVE_P_STATE, + (thread_state_t)&m_state.context.sve.p[0], + ARM_SVE_P_STATE_COUNT); + m_state.SetError(set, Write, kret); + DNBLogThreadedIf(LOG_THREAD, "Write ARM_SVE_P_STATE return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + + return kret; +} + +kern_return_t DNBArchMachARM64::SetSMEState() { + if (!CPUHasSME()) + return KERN_INVALID_ARGUMENT; + kern_return_t kret; + + int set = e_regSetSME; + size_t za_size = m_state.context.sme.svl_b * m_state.context.sme.svl_b; + const size_t max_chunk_size = 4096; + int n_chunks; + size_t chunk_size; + if (za_size <= max_chunk_size) { + n_chunks = 1; + chunk_size = za_size; + } else { + n_chunks = za_size / max_chunk_size; + chunk_size = max_chunk_size; + } + for (int i = 0; i < n_chunks; i++) { + arm_sme_za_state_t za_state; + memcpy(&za_state, m_state.context.sme.za.data() + (i * chunk_size), + chunk_size); + kret = thread_set_state(m_thread->MachPortNumber(), ARM_SME_ZA_STATE1 + i, + (thread_state_t)&za_state, ARM_SME_ZA_STATE_COUNT); + m_state.SetError(set, Write, kret); + DNBLogThreadedIf(LOG_THREAD, "Write ARM_SME_STATE return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + } + + if (CPUHasSME2()) { + kret = thread_set_state(m_thread->MachPortNumber(), ARM_SME2_STATE, + (thread_state_t)&m_state.context.sme.zt0, + ARM_SME2_STATE); + m_state.SetError(set, Write, kret); + DNBLogThreadedIf(LOG_THREAD, "Write ARM_SME2_STATE return value %d", kret); + if (kret != KERN_SUCCESS) + return kret; + } + + return kret; +} + kern_return_t DNBArchMachARM64::SetEXCState() { int set = e_regSetEXC; kern_return_t kret = ::thread_set_state( @@ -1531,6 +1756,59 @@ enum { vfp_d31 }; +enum { + sve_z0, + sve_z1, + sve_z2, + sve_z3, + sve_z4, + sve_z5, + sve_z6, + sve_z7, + sve_z8, + sve_z9, + sve_z10, + sve_z11, + sve_z12, + sve_z13, + sve_z14, + sve_z15, + sve_z16, + sve_z17, + sve_z18, + sve_z19, + sve_z20, + sve_z21, + sve_z22, + sve_z23, + sve_z24, + sve_z25, + sve_z26, + sve_z27, + sve_z28, + sve_z29, + sve_z30, + sve_z31, + sve_p0, + sve_p1, + sve_p2, + sve_p3, + sve_p4, + sve_p5, + sve_p6, + sve_p7, + sve_p8, + sve_p9, + sve_p10, + sve_p11, + sve_p12, + sve_p13, + sve_p14, + sve_p15 +}; + +enum { sme_svcr, sme_tpidr2, sme_svl_b, sme_za, sme_zt0 }; + enum { exc_far = 0, exc_esr, exc_exception }; // These numbers from the "DWARF for the ARM 64-bit Architecture (AArch64)" @@ -1681,7 +1959,60 @@ enum { debugserver_vfp_v30, debugserver_vfp_v31, debugserver_vfp_fpsr, - debugserver_vfp_fpcr + debugserver_vfp_fpcr, + debugserver_sve_z0, + debugserver_sve_z1, + debugserver_sve_z2, + debugserver_sve_z3, + debugserver_sve_z4, + debugserver_sve_z5, + debugserver_sve_z6, + debugserver_sve_z7, + debugserver_sve_z8, + debugserver_sve_z9, + debugserver_sve_z10, + debugserver_sve_z11, + debugserver_sve_z12, + debugserver_sve_z13, + debugserver_sve_z14, + debugserver_sve_z15, + debugserver_sve_z16, + debugserver_sve_z17, + debugserver_sve_z18, + debugserver_sve_z19, + debugserver_sve_z20, + debugserver_sve_z21, + debugserver_sve_z22, + debugserver_sve_z23, + debugserver_sve_z24, + debugserver_sve_z25, + debugserver_sve_z26, + debugserver_sve_z27, + debugserver_sve_z28, + debugserver_sve_z29, + debugserver_sve_z30, + debugserver_sve_z31, + debugserver_sve_p0, + debugserver_sve_p1, + debugserver_sve_p2, + debugserver_sve_p3, + debugserver_sve_p4, + debugserver_sve_p5, + debugserver_sve_p6, + debugserver_sve_p7, + debugserver_sve_p8, + debugserver_sve_p9, + debugserver_sve_p10, + debugserver_sve_p11, + debugserver_sve_p12, + debugserver_sve_p13, + debugserver_sve_p14, + debugserver_sve_p15, + debugserver_sme_svcr, + debugserver_sme_tpidr2, + debugserver_sme_svl_b, + debugserver_sme_za, + debugserver_sme_zt0 }; const char *g_contained_x0[]{"x0", NULL}; @@ -1906,38 +2237,74 @@ const char *g_contained_v29[]{"v29", NULL}; const char *g_contained_v30[]{"v30", NULL}; const char *g_contained_v31[]{"v31", NULL}; -const char *g_invalidate_v0[]{"v0", "d0", "s0", NULL}; -const char *g_invalidate_v1[]{"v1", "d1", "s1", NULL}; -const char *g_invalidate_v2[]{"v2", "d2", "s2", NULL}; -const char *g_invalidate_v3[]{"v3", "d3", "s3", NULL}; -const char *g_invalidate_v4[]{"v4", "d4", "s4", NULL}; -const char *g_invalidate_v5[]{"v5", "d5", "s5", NULL}; -const char *g_invalidate_v6[]{"v6", "d6", "s6", NULL}; -const char *g_invalidate_v7[]{"v7", "d7", "s7", NULL}; -const char *g_invalidate_v8[]{"v8", "d8", "s8", NULL}; -const char *g_invalidate_v9[]{"v9", "d9", "s9", NULL}; -const char *g_invalidate_v10[]{"v10", "d10", "s10", NULL}; -const char *g_invalidate_v11[]{"v11", "d11", "s11", NULL}; -const char *g_invalidate_v12[]{"v12", "d12", "s12", NULL}; -const char *g_invalidate_v13[]{"v13", "d13", "s13", NULL}; -const char *g_invalidate_v14[]{"v14", "d14", "s14", NULL}; -const char *g_invalidate_v15[]{"v15", "d15", "s15", NULL}; -const char *g_invalidate_v16[]{"v16", "d16", "s16", NULL}; -const char *g_invalidate_v17[]{"v17", "d17", "s17", NULL}; -const char *g_invalidate_v18[]{"v18", "d18", "s18", NULL}; -const char *g_invalidate_v19[]{"v19", "d19", "s19", NULL}; -const char *g_invalidate_v20[]{"v20", "d20", "s20", NULL}; -const char *g_invalidate_v21[]{"v21", "d21", "s21", NULL}; -const char *g_invalidate_v22[]{"v22", "d22", "s22", NULL}; -const char *g_invalidate_v23[]{"v23", "d23", "s23", NULL}; -const char *g_invalidate_v24[]{"v24", "d24", "s24", NULL}; -const char *g_invalidate_v25[]{"v25", "d25", "s25", NULL}; -const char *g_invalidate_v26[]{"v26", "d26", "s26", NULL}; -const char *g_invalidate_v27[]{"v27", "d27", "s27", NULL}; -const char *g_invalidate_v28[]{"v28", "d28", "s28", NULL}; -const char *g_invalidate_v29[]{"v29", "d29", "s29", NULL}; -const char *g_invalidate_v30[]{"v30", "d30", "s30", NULL}; -const char *g_invalidate_v31[]{"v31", "d31", "s31", NULL}; +const char *g_invalidate_v[32][4]{ + {"v0", "d0", "s0", NULL}, {"v1", "d1", "s1", NULL}, + {"v2", "d2", "s2", NULL}, {"v3", "d3", "s3", NULL}, + {"v4", "d4", "s4", NULL}, {"v5", "d5", "s5", NULL}, + {"v6", "d6", "s6", NULL}, {"v7", "d7", "s7", NULL}, + {"v8", "d8", "s8", NULL}, {"v9", "d9", "s9", NULL}, + {"v10", "d10", "s10", NULL}, {"v11", "d11", "s11", NULL}, + {"v12", "d12", "s12", NULL}, {"v13", "d13", "s13", NULL}, + {"v14", "d14", "s14", NULL}, {"v15", "d15", "s15", NULL}, + {"v16", "d16", "s16", NULL}, {"v17", "d17", "s17", NULL}, + {"v18", "d18", "s18", NULL}, {"v19", "d19", "s19", NULL}, + {"v20", "d20", "s20", NULL}, {"v21", "d21", "s21", NULL}, + {"v22", "d22", "s22", NULL}, {"v23", "d23", "s23", NULL}, + {"v24", "d24", "s24", NULL}, {"v25", "d25", "s25", NULL}, + {"v26", "d26", "s26", NULL}, {"v27", "d27", "s27", NULL}, + {"v28", "d28", "s28", NULL}, {"v29", "d29", "s29", NULL}, + {"v30", "d30", "s30", NULL}, {"v31", "d31", "s31", NULL}}; + +const char *g_invalidate_z[32][5]{ + {"z0", "v0", "d0", "s0", NULL}, {"z1", "v1", "d1", "s1", NULL}, + {"z2", "v2", "d2", "s2", NULL}, {"z3", "v3", "d3", "s3", NULL}, + {"z4", "v4", "d4", "s4", NULL}, {"z5", "v5", "d5", "s5", NULL}, + {"z6", "v6", "d6", "s6", NULL}, {"z7", "v7", "d7", "s7", NULL}, + {"z8", "v8", "d8", "s8", NULL}, {"z9", "v9", "d9", "s9", NULL}, + {"z10", "v10", "d10", "s10", NULL}, {"z11", "v11", "d11", "s11", NULL}, + {"z12", "v12", "d12", "s12", NULL}, {"z13", "v13", "d13", "s13", NULL}, + {"z14", "v14", "d14", "s14", NULL}, {"z15", "v15", "d15", "s15", NULL}, + {"z16", "v16", "d16", "s16", NULL}, {"z17", "v17", "d17", "s17", NULL}, + {"z18", "v18", "d18", "s18", NULL}, {"z19", "v19", "d19", "s19", NULL}, + {"z20", "v20", "d20", "s20", NULL}, {"z21", "v21", "d21", "s21", NULL}, + {"z22", "v22", "d22", "s22", NULL}, {"z23", "v23", "d23", "s23", NULL}, + {"z24", "v24", "d24", "s24", NULL}, {"z25", "v25", "d25", "s25", NULL}, + {"z26", "v26", "d26", "s26", NULL}, {"z27", "v27", "d27", "s27", NULL}, + {"z28", "v28", "d28", "s28", NULL}, {"z29", "v29", "d29", "s29", NULL}, + {"z30", "v30", "d30", "s30", NULL}, {"z31", "v31", "d31", "s31", NULL}}; + +const char *g_contained_z0[]{"z0", NULL}; +const char *g_contained_z1[]{"z1", NULL}; +const char *g_contained_z2[]{"z2", NULL}; +const char *g_contained_z3[]{"z3", NULL}; +const char *g_contained_z4[]{"z4", NULL}; +const char *g_contained_z5[]{"z5", NULL}; +const char *g_contained_z6[]{"z6", NULL}; +const char *g_contained_z7[]{"z7", NULL}; +const char *g_contained_z8[]{"z8", NULL}; +const char *g_contained_z9[]{"z9", NULL}; +const char *g_contained_z10[]{"z10", NULL}; +const char *g_contained_z11[]{"z11", NULL}; +const char *g_contained_z12[]{"z12", NULL}; +const char *g_contained_z13[]{"z13", NULL}; +const char *g_contained_z14[]{"z14", NULL}; +const char *g_contained_z15[]{"z15", NULL}; +const char *g_contained_z16[]{"z16", NULL}; +const char *g_contained_z17[]{"z17", NULL}; +const char *g_contained_z18[]{"z18", NULL}; +const char *g_contained_z19[]{"z19", NULL}; +const char *g_contained_z20[]{"z20", NULL}; +const char *g_contained_z21[]{"z21", NULL}; +const char *g_contained_z22[]{"z22", NULL}; +const char *g_contained_z23[]{"z23", NULL}; +const char *g_contained_z24[]{"z24", NULL}; +const char *g_contained_z25[]{"z25", NULL}; +const char *g_contained_z26[]{"z26", NULL}; +const char *g_contained_z27[]{"z27", NULL}; +const char *g_contained_z28[]{"z28", NULL}; +const char *g_contained_z29[]{"z29", NULL}; +const char *g_contained_z30[]{"z30", NULL}; +const char *g_contained_z31[]{"z31", NULL}; #if defined(__arm64__) || defined(__aarch64__) #define VFP_V_OFFSET_IDX(idx) \ @@ -1948,141 +2315,18 @@ const char *g_invalidate_v31[]{"v31", "d31", "s31", NULL}; (offsetof(DNBArchMachARM64::FPU, opaque) + (idx * 16) + \ offsetof(DNBArchMachARM64::Context, vfp)) #endif -#define VFP_OFFSET_NAME(reg) \ - (offsetof(DNBArchMachARM64::FPU, reg) + \ - offsetof(DNBArchMachARM64::Context, vfp)) #define EXC_OFFSET(reg) \ (offsetof(DNBArchMachARM64::EXC, reg) + \ offsetof(DNBArchMachARM64::Context, exc)) - -//#define FLOAT_FORMAT Float -#define DEFINE_VFP_V_IDX(idx) \ - { \ - e_regSetVFP, vfp_v##idx, "v" #idx, "q" #idx, Vector, VectorOfUInt8, 16, \ - VFP_V_OFFSET_IDX(idx), INVALID_NUB_REGNUM, dwarf_v##idx, \ - INVALID_NUB_REGNUM, debugserver_vfp_v##idx, NULL, g_invalidate_v##idx \ - } -#define DEFINE_PSEUDO_VFP_S_IDX(idx) \ - { \ - e_regSetVFP, vfp_s##idx, "s" #idx, NULL, IEEE754, Float, 4, 0, \ - INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, \ - INVALID_NUB_REGNUM, g_contained_v##idx, g_invalidate_v##idx \ - } -#define DEFINE_PSEUDO_VFP_D_IDX(idx) \ - { \ - e_regSetVFP, vfp_d##idx, "d" #idx, NULL, IEEE754, Float, 8, 0, \ - INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, \ - INVALID_NUB_REGNUM, g_contained_v##idx, g_invalidate_v##idx \ - } - -// Floating point registers -const DNBRegisterInfo DNBArchMachARM64::g_vfp_registers[] = { - DEFINE_VFP_V_IDX(0), - DEFINE_VFP_V_IDX(1), - DEFINE_VFP_V_IDX(2), - DEFINE_VFP_V_IDX(3), - DEFINE_VFP_V_IDX(4), - DEFINE_VFP_V_IDX(5), - DEFINE_VFP_V_IDX(6), - DEFINE_VFP_V_IDX(7), - DEFINE_VFP_V_IDX(8), - DEFINE_VFP_V_IDX(9), - DEFINE_VFP_V_IDX(10), - DEFINE_VFP_V_IDX(11), - DEFINE_VFP_V_IDX(12), - DEFINE_VFP_V_IDX(13), - DEFINE_VFP_V_IDX(14), - DEFINE_VFP_V_IDX(15), - DEFINE_VFP_V_IDX(16), - DEFINE_VFP_V_IDX(17), - DEFINE_VFP_V_IDX(18), - DEFINE_VFP_V_IDX(19), - DEFINE_VFP_V_IDX(20), - DEFINE_VFP_V_IDX(21), - DEFINE_VFP_V_IDX(22), - DEFINE_VFP_V_IDX(23), - DEFINE_VFP_V_IDX(24), - DEFINE_VFP_V_IDX(25), - DEFINE_VFP_V_IDX(26), - DEFINE_VFP_V_IDX(27), - DEFINE_VFP_V_IDX(28), - DEFINE_VFP_V_IDX(29), - DEFINE_VFP_V_IDX(30), - DEFINE_VFP_V_IDX(31), - {e_regSetVFP, vfp_fpsr, "fpsr", NULL, Uint, Hex, 4, - VFP_V_OFFSET_IDX(32) + 0, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, - INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, - {e_regSetVFP, vfp_fpcr, "fpcr", NULL, Uint, Hex, 4, - VFP_V_OFFSET_IDX(32) + 4, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, - INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, - - DEFINE_PSEUDO_VFP_S_IDX(0), - DEFINE_PSEUDO_VFP_S_IDX(1), - DEFINE_PSEUDO_VFP_S_IDX(2), - DEFINE_PSEUDO_VFP_S_IDX(3), - DEFINE_PSEUDO_VFP_S_IDX(4), - DEFINE_PSEUDO_VFP_S_IDX(5), - DEFINE_PSEUDO_VFP_S_IDX(6), - DEFINE_PSEUDO_VFP_S_IDX(7), - DEFINE_PSEUDO_VFP_S_IDX(8), - DEFINE_PSEUDO_VFP_S_IDX(9), - DEFINE_PSEUDO_VFP_S_IDX(10), - DEFINE_PSEUDO_VFP_S_IDX(11), - DEFINE_PSEUDO_VFP_S_IDX(12), - DEFINE_PSEUDO_VFP_S_IDX(13), - DEFINE_PSEUDO_VFP_S_IDX(14), - DEFINE_PSEUDO_VFP_S_IDX(15), - DEFINE_PSEUDO_VFP_S_IDX(16), - DEFINE_PSEUDO_VFP_S_IDX(17), - DEFINE_PSEUDO_VFP_S_IDX(18), - DEFINE_PSEUDO_VFP_S_IDX(19), - DEFINE_PSEUDO_VFP_S_IDX(20), - DEFINE_PSEUDO_VFP_S_IDX(21), - DEFINE_PSEUDO_VFP_S_IDX(22), - DEFINE_PSEUDO_VFP_S_IDX(23), - DEFINE_PSEUDO_VFP_S_IDX(24), - DEFINE_PSEUDO_VFP_S_IDX(25), - DEFINE_PSEUDO_VFP_S_IDX(26), - DEFINE_PSEUDO_VFP_S_IDX(27), - DEFINE_PSEUDO_VFP_S_IDX(28), - DEFINE_PSEUDO_VFP_S_IDX(29), - DEFINE_PSEUDO_VFP_S_IDX(30), - DEFINE_PSEUDO_VFP_S_IDX(31), - - DEFINE_PSEUDO_VFP_D_IDX(0), - DEFINE_PSEUDO_VFP_D_IDX(1), - DEFINE_PSEUDO_VFP_D_IDX(2), - DEFINE_PSEUDO_VFP_D_IDX(3), - DEFINE_PSEUDO_VFP_D_IDX(4), - DEFINE_PSEUDO_VFP_D_IDX(5), - DEFINE_PSEUDO_VFP_D_IDX(6), - DEFINE_PSEUDO_VFP_D_IDX(7), - DEFINE_PSEUDO_VFP_D_IDX(8), - DEFINE_PSEUDO_VFP_D_IDX(9), - DEFINE_PSEUDO_VFP_D_IDX(10), - DEFINE_PSEUDO_VFP_D_IDX(11), - DEFINE_PSEUDO_VFP_D_IDX(12), - DEFINE_PSEUDO_VFP_D_IDX(13), - DEFINE_PSEUDO_VFP_D_IDX(14), - DEFINE_PSEUDO_VFP_D_IDX(15), - DEFINE_PSEUDO_VFP_D_IDX(16), - DEFINE_PSEUDO_VFP_D_IDX(17), - DEFINE_PSEUDO_VFP_D_IDX(18), - DEFINE_PSEUDO_VFP_D_IDX(19), - DEFINE_PSEUDO_VFP_D_IDX(20), - DEFINE_PSEUDO_VFP_D_IDX(21), - DEFINE_PSEUDO_VFP_D_IDX(22), - DEFINE_PSEUDO_VFP_D_IDX(23), - DEFINE_PSEUDO_VFP_D_IDX(24), - DEFINE_PSEUDO_VFP_D_IDX(25), - DEFINE_PSEUDO_VFP_D_IDX(26), - DEFINE_PSEUDO_VFP_D_IDX(27), - DEFINE_PSEUDO_VFP_D_IDX(28), - DEFINE_PSEUDO_VFP_D_IDX(29), - DEFINE_PSEUDO_VFP_D_IDX(30), - DEFINE_PSEUDO_VFP_D_IDX(31) - -}; +#define SVE_OFFSET_Z_IDX(idx) \ + (offsetof(DNBArchMachARM64::SVE, z[idx]) + \ + offsetof(DNBArchMachARM64::Context, sve)) +#define SVE_OFFSET_P_IDX(idx) \ + (offsetof(DNBArchMachARM64::SVE, p[idx]) + \ + offsetof(DNBArchMachARM64::Context, sve)) +#define SME_OFFSET(reg) \ + (offsetof(DNBArchMachARM64::SME, reg) + \ + offsetof(DNBArchMachARM64::Context, sme)) //_STRUCT_ARM_EXCEPTION_STATE64 //{ @@ -2106,29 +2350,220 @@ const DNBRegisterInfo DNBArchMachARM64::g_exc_registers[] = { // Number of registers in each register set const size_t DNBArchMachARM64::k_num_gpr_registers = sizeof(g_gpr_registers) / sizeof(DNBRegisterInfo); -const size_t DNBArchMachARM64::k_num_vfp_registers = - sizeof(g_vfp_registers) / sizeof(DNBRegisterInfo); const size_t DNBArchMachARM64::k_num_exc_registers = sizeof(g_exc_registers) / sizeof(DNBRegisterInfo); -const size_t DNBArchMachARM64::k_num_all_registers = - k_num_gpr_registers + k_num_vfp_registers + k_num_exc_registers; - -// Register set definitions. The first definitions at register set index -// of zero is for all registers, followed by other registers sets. The -// register information for the all register set need not be filled in. -const DNBRegisterSetInfo DNBArchMachARM64::g_reg_sets[] = { - {"ARM64 Registers", NULL, k_num_all_registers}, - {"General Purpose Registers", g_gpr_registers, k_num_gpr_registers}, - {"Floating Point Registers", g_vfp_registers, k_num_vfp_registers}, - {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; -// Total number of register sets for this architecture -const size_t DNBArchMachARM64::k_num_register_sets = - sizeof(g_reg_sets) / sizeof(DNBRegisterSetInfo); +static std::vector g_sve_registers; +static void initialize_sve_registers() { + static const char *g_z_regnames[32] = { + "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", + "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", + "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", + "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31"}; + static const char *g_p_regnames[16] = { + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"}; + + if (DNBArchMachARM64::CPUHasSME()) { + uint32_t svl_bytes = DNBArchMachARM64::GetSMEMaxSVL(); + for (uint32_t i = 0; i < 32; i++) { + g_sve_registers.push_back( + {DNBArchMachARM64::e_regSetSVE, (uint32_t)sve_z0 + i, g_z_regnames[i], + NULL, Vector, VectorOfUInt8, svl_bytes, + static_cast(SVE_OFFSET_Z_IDX(i)), INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + (uint32_t)debugserver_sve_z0 + i, NULL, g_invalidate_z[i]}); + } + for (uint32_t i = 0; i < 16; i++) { + g_sve_registers.push_back( + {DNBArchMachARM64::e_regSetSVE, (uint32_t)sve_p0 + i, g_p_regnames[i], + NULL, Vector, VectorOfUInt8, svl_bytes / 8, + (uint32_t)SVE_OFFSET_P_IDX(i), INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + (uint32_t)debugserver_sve_p0 + i, NULL, NULL}); + } + } +} + +static std::vector g_vfp_registers; +static void initialize_vfp_registers() { + static const char *g_v_regnames[32] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"}; + static const char *g_q_regnames[32] = { + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", + "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", + "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31"}; + + static const char *g_d_regnames[32] = { + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"}; + + static const char *g_s_regnames[32] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"}; + + for (uint32_t i = 0; i < 32; i++) + if (DNBArchMachARM64::CPUHasSME()) + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, (uint32_t)vfp_v0 + i, g_v_regnames[i], + g_q_regnames[i], Vector, VectorOfUInt8, 16, + static_cast(VFP_V_OFFSET_IDX(i)), INVALID_NUB_REGNUM, + (uint32_t)dwarf_v0 + i, INVALID_NUB_REGNUM, + (uint32_t)debugserver_vfp_v0 + i, NULL, g_invalidate_z[i]}); + else + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, (uint32_t)vfp_v0 + i, g_v_regnames[i], + g_q_regnames[i], Vector, VectorOfUInt8, 16, + static_cast(VFP_V_OFFSET_IDX(i)), INVALID_NUB_REGNUM, + (uint32_t)dwarf_v0 + i, INVALID_NUB_REGNUM, + (uint32_t)debugserver_vfp_v0 + i, NULL, g_invalidate_v[i]}); + + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, vfp_fpsr, "fpsr", NULL, Uint, Hex, 4, + VFP_V_OFFSET_IDX(32) + 0, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}); + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, vfp_fpcr, "fpcr", NULL, Uint, Hex, 4, + VFP_V_OFFSET_IDX(32) + 4, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}); + + for (uint32_t i = 0; i < 32; i++) + if (DNBArchMachARM64::CPUHasSME()) + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, (uint32_t)vfp_d0 + i, g_d_regnames[i], + NULL, IEEE754, Float, 8, 0, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, g_invalidate_z[i]}); + else + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, (uint32_t)vfp_d0 + i, g_d_regnames[i], + NULL, IEEE754, Float, 8, 0, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, g_invalidate_v[i]}); + + for (uint32_t i = 0; i < 32; i++) + if (DNBArchMachARM64::CPUHasSME()) + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, (uint32_t)vfp_s0 + i, g_s_regnames[i], + NULL, IEEE754, Float, 4, 0, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, g_invalidate_z[i]}); + else + g_vfp_registers.push_back( + {DNBArchMachARM64::e_regSetVFP, (uint32_t)vfp_s0 + i, g_s_regnames[i], + NULL, IEEE754, Float, 4, 0, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, g_invalidate_v[i]}); +} + +static std::once_flag g_vfp_once; +DNBRegisterInfo * +DNBArchMachARM64::get_vfp_registerinfo(size_t &num_vfp_registers) { + std::call_once(g_vfp_once, []() { initialize_vfp_registers(); }); + num_vfp_registers = g_vfp_registers.size(); + if (num_vfp_registers > 0) + return g_vfp_registers.data(); + else + return nullptr; +} + +static std::once_flag g_sve_once; +DNBRegisterInfo * +DNBArchMachARM64::get_sve_registerinfo(size_t &num_sve_registers) { + std::call_once(g_sve_once, []() { initialize_sve_registers(); }); + num_sve_registers = g_sve_registers.size(); + if (num_sve_registers > 0) + return g_sve_registers.data(); + else + return nullptr; +} + +static std::vector g_sme_registers; +static void initialize_sme_registers() { + if (DNBArchMachARM64::CPUHasSME()) { + uint32_t svl_bytes = DNBArchMachARM64::GetSMEMaxSVL(); + g_sme_registers.push_back( + {DNBArchMachARM64::e_regSetSME, sme_svcr, "svcr", NULL, Uint, Hex, 8, + SME_OFFSET(svcr), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}); + g_sme_registers.push_back( + {DNBArchMachARM64::e_regSetSME, sme_tpidr2, "tpidr2", NULL, Uint, Hex, + 8, SME_OFFSET(tpidr2), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}); + g_sme_registers.push_back( + {DNBArchMachARM64::e_regSetSME, sme_svl_b, "svl", NULL, Uint, Hex, 2, + SME_OFFSET(svl_b), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}); + uint32_t za_max_size = svl_bytes * svl_bytes; + g_sme_registers.push_back({DNBArchMachARM64::e_regSetSME, sme_za, "za", + NULL, Vector, VectorOfUInt8, za_max_size, + SME_OFFSET(za), INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, NULL, NULL}); + } + if (DNBArchMachARM64::CPUHasSME2()) { + g_sme_registers.push_back({DNBArchMachARM64::e_regSetSME, sme_zt0, "zt0", + NULL, Vector, VectorOfUInt8, 64, SME_OFFSET(zt0), + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, + NULL}); + } +} + +static std::once_flag g_sme_once; +DNBRegisterInfo * +DNBArchMachARM64::get_sme_registerinfo(size_t &num_sme_registers) { + std::call_once(g_sme_once, []() { initialize_sme_registers(); }); + num_sme_registers = g_sme_registers.size(); + if (num_sme_registers > 0) + return g_sme_registers.data(); + else + return nullptr; +} + +static std::vector g_reg_sets; +void DNBArchMachARM64::initialize_reg_sets() { + nub_size_t num_all_registers = DNBArchMachARM64::k_num_gpr_registers + + DNBArchMachARM64::k_num_exc_registers; + size_t num_vfp_registers = 0; + DNBRegisterInfo *vfp_reginfos = + DNBArchMachARM64::get_vfp_registerinfo(num_vfp_registers); + size_t num_sve_registers = 0; + DNBRegisterInfo *sve_reginfos = + DNBArchMachARM64::get_sve_registerinfo(num_sve_registers); + size_t num_sme_registers = 0; + DNBRegisterInfo *sme_reginfos = + DNBArchMachARM64::get_sme_registerinfo(num_sme_registers); + num_all_registers += + num_vfp_registers + num_sve_registers + num_sme_registers; + g_reg_sets.push_back({"ARM64 Registers", NULL, num_all_registers}); + g_reg_sets.push_back({"General Purpose Registers", + DNBArchMachARM64::g_gpr_registers, + DNBArchMachARM64::k_num_gpr_registers}); + g_reg_sets.push_back( + {"Floating Point Registers", vfp_reginfos, num_vfp_registers}); + g_reg_sets.push_back({"Exception State Registers", + DNBArchMachARM64::g_exc_registers, + DNBArchMachARM64::k_num_exc_registers}); + if (DNBArchMachARM64::CPUHasSME()) { + g_reg_sets.push_back({"Scalable Vector Extension Registers", sve_reginfos, + num_sve_registers}); + g_reg_sets.push_back({"Scalable Matrix Extension Registers", sme_reginfos, + num_sme_registers}); + } +} + +static std::once_flag g_initialize_register_set_info; const DNBRegisterSetInfo * DNBArchMachARM64::GetRegisterSetInfo(nub_size_t *num_reg_sets) { - *num_reg_sets = k_num_register_sets; - return g_reg_sets; + std::call_once(g_initialize_register_set_info, + []() { initialize_reg_sets(); }); + *num_reg_sets = g_reg_sets.size(); + return g_reg_sets.data(); } bool DNBArchMachARM64::FixGenericRegisterNumber(uint32_t &set, uint32_t ®) { @@ -2185,6 +2620,7 @@ bool DNBArchMachARM64::GetRegisterValue(uint32_t set, uint32_t reg, const DNBRegisterInfo *regInfo = m_thread->GetRegisterInfo(set, reg); if (regInfo) { + uint16_t max_svl_bytes = GetSMEMaxSVL(); value->info = *regInfo; switch (set) { case e_regSetGPR: @@ -2281,6 +2717,46 @@ bool DNBArchMachARM64::GetRegisterValue(uint32_t set, uint32_t reg, } break; + case e_regSetSVE: + if (GetRegisterState(e_regSetSVE, false) != KERN_SUCCESS) + return false; + + if (reg >= sve_z0 && reg <= sve_z31) { + memset(&value->value.v_uint8, 0, max_svl_bytes); + memcpy(&value->value.v_uint8, &m_state.context.sve.z[reg - sve_z0], + max_svl_bytes); + return true; + } else if (reg >= sve_p0 && reg <= sve_p15) { + memset(&value->value.v_uint8, 0, max_svl_bytes / 8); + memcpy(&value->value.v_uint8, &m_state.context.sve.p[reg - sve_p0], + max_svl_bytes / 8); + return true; + } + break; + + case e_regSetSME: + if (GetRegisterState(e_regSetSME, false) != KERN_SUCCESS) + return false; + + if (reg == sme_svcr) { + value->value.uint64 = m_state.context.sme.svcr; + return true; + } else if (reg == sme_tpidr2) { + value->value.uint64 = m_state.context.sme.tpidr2; + return true; + } else if (reg == sme_svl_b) { + value->value.uint64 = m_state.context.sme.svl_b; + return true; + } else if (reg == sme_za) { + memcpy(&value->value.v_uint8, m_state.context.sme.za.data(), + max_svl_bytes * max_svl_bytes); + return true; + } else if (reg == sme_zt0) { + memcpy(&value->value.v_uint8, &m_state.context.sme.zt0, 64); + return true; + } + break; + case e_regSetEXC: if (reg == exc_far) { value->value.uint64 = m_state.context.exc.__far; @@ -2387,6 +2863,37 @@ bool DNBArchMachARM64::SetRegisterValue(uint32_t set, uint32_t reg, } break; + case e_regSetSVE: + if (reg >= sve_z0 && reg <= sve_z31) { + uint16_t max_svl_bytes = GetSMEMaxSVL(); + memcpy(&m_state.context.sve.z[reg - sve_z0], &value->value.v_uint8, + max_svl_bytes); + success = true; + } + if (reg >= sve_p0 && reg <= sve_p15) { + uint16_t max_svl_bytes = GetSMEMaxSVL(); + memcpy(&m_state.context.sve.p[reg - sve_p0], &value->value.v_uint8, + max_svl_bytes / 8); + success = true; + } + break; + + case e_regSetSME: + // Cannot change ARM_SME_STATE registers with thread_set_state + if (reg == sme_svcr || reg == sme_tpidr2 || reg == sme_svl_b) + return false; + if (reg == sme_za) { + uint16_t max_svl_bytes = GetSMEMaxSVL(); + memcpy(m_state.context.sme.za.data(), &value->value.v_uint8, + max_svl_bytes * max_svl_bytes); + success = true; + } + if (reg == sme_zt0) { + memcpy(&m_state.context.sme.zt0, &value->value.v_uint8, 64); + success = true; + } + break; + case e_regSetEXC: if (reg == exc_far) { m_state.context.exc.__far = value->value.uint64; @@ -2408,13 +2915,26 @@ bool DNBArchMachARM64::SetRegisterValue(uint32_t set, uint32_t reg, kern_return_t DNBArchMachARM64::GetRegisterState(int set, bool force) { switch (set) { - case e_regSetALL: - return GetGPRState(force) | GetVFPState(force) | GetEXCState(force) | - GetDBGState(force); + case e_regSetALL: { + kern_return_t retval = GetGPRState(force) | GetVFPState(force) | + GetEXCState(force) | GetDBGState(force); + // If the processor is not in Streaming SVE Mode currently, these + // two will fail to read. Don't return that as an error, it will + // be the most common case. + if (CPUHasSME()) { + GetSVEState(force); + GetSMEState(force); + } + return retval; + } case e_regSetGPR: return GetGPRState(force); case e_regSetVFP: return GetVFPState(force); + case e_regSetSVE: + return GetSVEState(force); + case e_regSetSME: + return GetSMEState(force); case e_regSetEXC: return GetEXCState(force); case e_regSetDBG: @@ -2438,6 +2958,10 @@ kern_return_t DNBArchMachARM64::SetRegisterState(int set) { return SetGPRState(); case e_regSetVFP: return SetVFPState(); + case e_regSetSVE: + return SetSVEState(); + case e_regSetSME: + return SetSMEState(); case e_regSetEXC: return SetEXCState(); case e_regSetDBG: @@ -2455,6 +2979,15 @@ bool DNBArchMachARM64::RegisterSetStateIsValid(int set) const { nub_size_t DNBArchMachARM64::GetRegisterContext(void *buf, nub_size_t buf_len) { nub_size_t size = sizeof(m_state.context.gpr) + sizeof(m_state.context.vfp) + sizeof(m_state.context.exc); + const bool cpu_has_sme = CPUHasSME(); + if (cpu_has_sme) { + size += sizeof(m_state.context.sve); + // ZA register is in a std::vector so we need to add + // the sizes of the SME manually. + size += ARM_SME_STATE_COUNT * sizeof(uint32_t); + size += m_state.context.sme.za.size(); + size += ARM_SME2_STATE_COUNT * sizeof(uint32_t); + } if (buf && buf_len) { if (size > buf_len) @@ -2463,6 +2996,13 @@ nub_size_t DNBArchMachARM64::GetRegisterContext(void *buf, nub_size_t buf_len) { bool force = false; if (GetGPRState(force) | GetVFPState(force) | GetEXCState(force)) return 0; + // Don't error out if SME/SVE fail to read. These can only be read + // when the process is in Streaming SVE Mode, so the failure to read + // them will be common. + if (cpu_has_sme) { + GetSVEState(force); + GetSMEState(force); + } // Copy each struct individually to avoid any padding that might be between // the structs in m_state.context @@ -2471,6 +3011,21 @@ nub_size_t DNBArchMachARM64::GetRegisterContext(void *buf, nub_size_t buf_len) { p += sizeof(m_state.context.gpr); ::memcpy(p, &m_state.context.vfp, sizeof(m_state.context.vfp)); p += sizeof(m_state.context.vfp); + if (cpu_has_sme) { + ::memcpy(p, &m_state.context.sve, sizeof(m_state.context.sve)); + p += sizeof(m_state.context.sve); + + memcpy(p, &m_state.context.sme.svcr, + ARM_SME_STATE_COUNT * sizeof(uint32_t)); + p += ARM_SME_STATE_COUNT * sizeof(uint32_t); + memcpy(p, m_state.context.sme.za.data(), m_state.context.sme.za.size()); + p += m_state.context.sme.za.size(); + if (CPUHasSME2()) { + memcpy(p, &m_state.context.sme.zt0, + ARM_SME2_STATE_COUNT * sizeof(uint32_t)); + p += ARM_SME2_STATE_COUNT * sizeof(uint32_t); + } + } ::memcpy(p, &m_state.context.exc, sizeof(m_state.context.exc)); p += sizeof(m_state.context.exc); @@ -2490,6 +3045,15 @@ nub_size_t DNBArchMachARM64::SetRegisterContext(const void *buf, nub_size_t buf_len) { nub_size_t size = sizeof(m_state.context.gpr) + sizeof(m_state.context.vfp) + sizeof(m_state.context.exc); + if (CPUHasSME()) { + // m_state.context.za is three status registers, then a std::vector + // for ZA, then zt0, so the size of the data is not statically knowable. + nub_size_t sme_size = ARM_SME_STATE_COUNT * sizeof(uint32_t); + sme_size += m_state.context.sme.za.size(); + sme_size += ARM_SME2_STATE_COUNT * sizeof(uint32_t); + + size += sizeof(m_state.context.sve) + sme_size; + } if (buf == NULL || buf_len == 0) size = 0; @@ -2505,6 +3069,20 @@ nub_size_t DNBArchMachARM64::SetRegisterContext(const void *buf, p += sizeof(m_state.context.gpr); ::memcpy(&m_state.context.vfp, p, sizeof(m_state.context.vfp)); p += sizeof(m_state.context.vfp); + if (CPUHasSME()) { + memcpy(&m_state.context.sve, p, sizeof(m_state.context.sve)); + p += sizeof(m_state.context.sve); + memcpy(&m_state.context.sme.svcr, p, + ARM_SME_STATE_COUNT * sizeof(uint32_t)); + p += ARM_SME_STATE_COUNT * sizeof(uint32_t); + memcpy(m_state.context.sme.za.data(), p, m_state.context.sme.za.size()); + p += m_state.context.sme.za.size(); + if (CPUHasSME2()) { + memcpy(&m_state.context.sme.zt0, p, + ARM_SME2_STATE_COUNT * sizeof(uint32_t)); + p += ARM_SME2_STATE_COUNT * sizeof(uint32_t); + } + } ::memcpy(&m_state.context.exc, p, sizeof(m_state.context.exc)); p += sizeof(m_state.context.exc); @@ -2513,6 +3091,10 @@ nub_size_t DNBArchMachARM64::SetRegisterContext(const void *buf, assert(bytes_written == size); SetGPRState(); SetVFPState(); + if (CPUHasSME()) { + SetSVEState(); + SetSMEState(); + } SetEXCState(); } DNBLogThreadedIf( diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.h b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.h index 0ea33d8e1c4c5..11ad1f40c3ef6 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.h +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.h @@ -15,6 +15,10 @@ #include #include +#if !defined(ARM_SME_STATE) +#include "sme_thread_status.h" +#endif + #if defined(ARM_THREAD_STATE64_COUNT) #include "DNBArch.h" @@ -93,7 +97,6 @@ class DNBArchMachARM64 : public DNBArchProtocol { bool DisableHardwareWatchpoint_helper(uint32_t hw_break_index, bool also_set_on_task); -protected: kern_return_t EnableHardwareSingleStep(bool enable); static bool FixGenericRegisterNumber(uint32_t &set, uint32_t ®); @@ -102,6 +105,8 @@ class DNBArchMachARM64 : public DNBArchProtocol { e_regSetGPR, // ARM_THREAD_STATE64, e_regSetVFP, // ARM_NEON_STATE64, e_regSetEXC, // ARM_EXCEPTION_STATE64, + e_regSetSVE, // ARM_SVE_Z_STATE1, ARM_SVE_Z_STATE2, ARM_SVE_P_STATE + e_regSetSME, // ARM_SME_STATE, ARM_SME_ZA_STATE1..16, ARM_SME2_STATE e_regSetDBG, // ARM_DEBUG_STATE64, kNumRegisterSets }; @@ -119,20 +124,39 @@ class DNBArchMachARM64 : public DNBArchProtocol { typedef arm_neon_state64_t FPU; typedef arm_exception_state64_t EXC; + struct SVE { + uint8_t z[32][256]; // arm_sve_z_state_t z[2] + uint8_t p[16][32]; // arm_sve_p_state_t p + }; + + struct SME { + uint64_t svcr; // arm_sme_state_t + uint64_t tpidr2; // arm_sme_state_t + uint16_t svl_b; // arm_sme_state_t + + std::vector za; + uint8_t zt0[64]; + + SME() { + if (DNBArchMachARM64::CPUHasSME()) { + int svl = GetSMEMaxSVL(); + za.resize(svl * svl, 0); + } + } + }; + static const DNBRegisterInfo g_gpr_registers[]; - static const DNBRegisterInfo g_vfp_registers[]; static const DNBRegisterInfo g_exc_registers[]; - static const DNBRegisterSetInfo g_reg_sets[]; static const size_t k_num_gpr_registers; - static const size_t k_num_vfp_registers; static const size_t k_num_exc_registers; static const size_t k_num_all_registers; - static const size_t k_num_register_sets; struct Context { GPR gpr; FPU vfp; + SVE sve; + SME sme; EXC exc; }; @@ -141,6 +165,8 @@ class DNBArchMachARM64 : public DNBArchProtocol { arm_debug_state64_t dbg; kern_return_t gpr_errs[2]; // Read/Write errors kern_return_t vfp_errs[2]; // Read/Write errors + kern_return_t sve_errs[2]; // Read/Write errors + kern_return_t sme_errs[2]; // Read/Write errors kern_return_t exc_errs[2]; // Read/Write errors kern_return_t dbg_errs[2]; // Read/Write errors State() { @@ -148,6 +174,8 @@ class DNBArchMachARM64 : public DNBArchProtocol { for (i = 0; i < kNumErrors; i++) { gpr_errs[i] = -1; vfp_errs[i] = -1; + sve_errs[i] = -1; + sme_errs[i] = -1; exc_errs[i] = -1; dbg_errs[i] = -1; } @@ -163,11 +191,15 @@ class DNBArchMachARM64 : public DNBArchProtocol { // we got any kind of error. case e_regSetALL: return gpr_errs[err_idx] | vfp_errs[err_idx] | exc_errs[err_idx] | - dbg_errs[err_idx]; + sve_errs[err_idx] | sme_errs[err_idx] | dbg_errs[err_idx]; case e_regSetGPR: return gpr_errs[err_idx]; case e_regSetVFP: return vfp_errs[err_idx]; + case e_regSetSVE: + return sve_errs[err_idx]; + case e_regSetSME: + return sme_errs[err_idx]; case e_regSetEXC: return exc_errs[err_idx]; // case e_regSetDBG: return dbg_errs[err_idx]; @@ -183,6 +215,8 @@ class DNBArchMachARM64 : public DNBArchProtocol { case e_regSetALL: gpr_errs[err_idx] = err; vfp_errs[err_idx] = err; + sve_errs[err_idx] = err; + sme_errs[err_idx] = err; dbg_errs[err_idx] = err; exc_errs[err_idx] = err; return true; @@ -195,6 +229,14 @@ class DNBArchMachARM64 : public DNBArchProtocol { vfp_errs[err_idx] = err; return true; + case e_regSetSVE: + sve_errs[err_idx] = err; + return true; + + case e_regSetSME: + sme_errs[err_idx] = err; + return true; + case e_regSetEXC: exc_errs[err_idx] = err; return true; @@ -215,11 +257,15 @@ class DNBArchMachARM64 : public DNBArchProtocol { kern_return_t GetGPRState(bool force); kern_return_t GetVFPState(bool force); + kern_return_t GetSVEState(bool force); + kern_return_t GetSMEState(bool force); kern_return_t GetEXCState(bool force); kern_return_t GetDBGState(bool force); kern_return_t SetGPRState(); kern_return_t SetVFPState(); + kern_return_t SetSVEState(); + kern_return_t SetSMEState(); kern_return_t SetEXCState(); kern_return_t SetDBGState(bool also_set_on_task); @@ -246,7 +292,16 @@ class DNBArchMachARM64 : public DNBArchProtocol { uint32_t control; }; -protected: + static bool CPUHasSME(); + static bool CPUHasSME2(); + static unsigned int GetSMEMaxSVL(); + +private: + static DNBRegisterInfo *get_vfp_registerinfo(size_t &num_vfp_registers); + static DNBRegisterInfo *get_sve_registerinfo(size_t &num_sve_registers); + static DNBRegisterInfo *get_sme_registerinfo(size_t &num_sme_registers); + static void initialize_reg_sets(); + MachThread *m_thread; State m_state; arm_debug_state64_t m_dbg_save; @@ -264,6 +319,9 @@ class DNBArchMachARM64 : public DNBArchProtocol { typedef std::map SaveRegisterStates; SaveRegisterStates m_saved_register_states; + + DNBArchMachARM64(const DNBArchMachARM64 &) = delete; + DNBArchMachARM64 &operator=(const DNBArchMachARM64 &) = delete; }; #endif // #if defined (ARM_THREAD_STATE64_COUNT) diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h b/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h new file mode 100644 index 0000000000000..f33b3202ccab5 --- /dev/null +++ b/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h @@ -0,0 +1,77 @@ +#include +#include + +// Define the SVE/SME/SME2 thread status structures +// flavors, and sizes so this can build against an +// older SDK which does not have these definitions +// yet. + +#if !defined(ARM_SME_STATE) + +#define _STRUCT_ARM_SME_STATE struct arm_sme_state +_STRUCT_ARM_SME_STATE { + uint64_t svcr; + uint64_t tpidr2_el0; + uint16_t svl_b; +}; + +#define _STRUCT_ARM_SVE_Z_STATE struct arm_sve_z_state +_STRUCT_ARM_SVE_Z_STATE { char z[16][256]; } +__attribute__((aligned(alignof(unsigned int)))); + +#define _STRUCT_ARM_SVE_P_STATE struct arm_sve_p_state +_STRUCT_ARM_SVE_P_STATE { char p[16][256 / 8]; } +__attribute__((aligned(alignof(unsigned int)))); + +#define _STRUCT_ARM_SME_ZA_STATE struct arm_sme_za_state +_STRUCT_ARM_SME_ZA_STATE { char za[4096]; } +__attribute__((aligned(alignof(unsigned int)))); + +#define _STRUCT_ARM_SME2_STATE struct arm_sme2_state +_STRUCT_ARM_SME2_STATE { char zt0[64]; } +__attribute__((aligned(alignof(unsigned int)))); + +#define ARM_SME_STATE 28 +#define ARM_SVE_Z_STATE1 29 +#define ARM_SVE_Z_STATE2 30 +#define ARM_SVE_P_STATE 31 +#define ARM_SME_ZA_STATE1 32 +#define ARM_SME_ZA_STATE2 33 +#define ARM_SME_ZA_STATE3 34 +#define ARM_SME_ZA_STATE4 35 +#define ARM_SME_ZA_STATE5 36 +#define ARM_SME_ZA_STATE6 37 +#define ARM_SME_ZA_STATE7 38 +#define ARM_SME_ZA_STATE8 39 +#define ARM_SME_ZA_STATE9 40 +#define ARM_SME_ZA_STATE10 41 +#define ARM_SME_ZA_STATE11 42 +#define ARM_SME_ZA_STATE12 42 +#define ARM_SME_ZA_STATE13 44 +#define ARM_SME_ZA_STATE14 45 +#define ARM_SME_ZA_STATE15 46 +#define ARM_SME_ZA_STATE16 47 +#define ARM_SME2_STATE 48 + +typedef _STRUCT_ARM_SME_STATE arm_sme_state_t; +typedef _STRUCT_ARM_SVE_Z_STATE arm_sve_z_state_t; +typedef _STRUCT_ARM_SVE_P_STATE arm_sve_p_state_t; +typedef _STRUCT_ARM_SME_ZA_STATE arm_sme_za_state_t; +typedef _STRUCT_ARM_SME2_STATE arm_sme2_state_t; + +#define ARM_SME_STATE_COUNT \ + ((mach_msg_type_number_t)(sizeof(arm_sme_state_t) / sizeof(uint32_t))) + +#define ARM_SVE_Z_STATE_COUNT \ + ((mach_msg_type_number_t)(sizeof(arm_sve_z_state_t) / sizeof(uint32_t))) + +#define ARM_SVE_P_STATE_COUNT \ + ((mach_msg_type_number_t)(sizeof(arm_sve_p_state_t) / sizeof(uint32_t))) + +#define ARM_SME_ZA_STATE_COUNT \ + ((mach_msg_type_number_t)(sizeof(arm_sme_za_state_t) / sizeof(uint32_t))) + +#define ARM_SME2_STATE_COUNT \ + ((mach_msg_type_number_t)(sizeof(arm_sme2_state_t) / sizeof(uint32_t))) + +#endif // !defined(ARM_SME_STATE) diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index 07211c6e9db49..efa015920c0d5 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2567,42 +2568,50 @@ rnb_err_t RNBRemote::HandlePacket_QSetProcessEvent(const char *p) { return SendPacket("OK"); } -void register_value_in_hex_fixed_width(std::ostream &ostrm, nub_process_t pid, +// If a fail_value is provided, a correct-length reply is always provided, +// even if the register cannot be read right now on this thread. +bool register_value_in_hex_fixed_width(std::ostream &ostrm, nub_process_t pid, nub_thread_t tid, const register_map_entry_t *reg, - const DNBRegisterValue *reg_value_ptr) { + const DNBRegisterValue *reg_value_ptr, + std::optional fail_value) { if (reg != NULL) { - DNBRegisterValue reg_value; + std::unique_ptr reg_value = + std::make_unique(); if (reg_value_ptr == NULL) { if (DNBThreadGetRegisterValueByID(pid, tid, reg->nub_info.set, - reg->nub_info.reg, ®_value)) - reg_value_ptr = ®_value; + reg->nub_info.reg, reg_value.get())) + reg_value_ptr = reg_value.get(); } if (reg_value_ptr) { append_hex_value(ostrm, reg_value_ptr->value.v_uint8, reg->nub_info.size, false); - } else { - // If we fail to read a register value, check if it has a default - // fail value. If it does, return this instead in case some of - // the registers are not available on the current system. - if (reg->nub_info.size > 0) { - std::vector zeros(reg->nub_info.size, '\0'); - append_hex_value(ostrm, zeros.data(), zeros.size(), false); - } + return true; } + if (!fail_value || reg->nub_info.size == 0) + return false; + + // Pad out the reply to the correct size to maintain correct offsets, + // even if we could not read the register value. + std::vector fail_result(reg->nub_info.size, *fail_value); + append_hex_value(ostrm, fail_result.data(), fail_result.size(), false); + return true; } + return false; } void debugserver_regnum_with_fixed_width_hex_register_value( std::ostream &ostrm, nub_process_t pid, nub_thread_t tid, - const register_map_entry_t *reg, const DNBRegisterValue *reg_value_ptr) { + const register_map_entry_t *reg, const DNBRegisterValue *reg_value_ptr, + std::optional fail_value) { // Output the register number as 'NN:VVVVVVVV;' where NN is a 2 bytes HEX // gdb register number, and VVVVVVVV is the correct number of hex bytes // as ASCII for the register value. if (reg != NULL) { ostrm << RAWHEX8(reg->debugserver_regnum) << ':'; - register_value_in_hex_fixed_width(ostrm, pid, tid, reg, reg_value_ptr); + register_value_in_hex_fixed_width(ostrm, pid, tid, reg, reg_value_ptr, + fail_value); ostrm << ';'; } } @@ -2651,15 +2660,16 @@ typedef std::map StackMemoryMap; static void ReadStackMemory(nub_process_t pid, nub_thread_t tid, StackMemoryMap &stack_mmap, uint32_t backtrace_limit = 256) { - DNBRegisterValue reg_value; + std::unique_ptr reg_value = + std::make_unique(); if (DNBThreadGetRegisterValueByID(pid, tid, REGISTER_SET_GENERIC, - GENERIC_REGNUM_FP, ®_value)) { + GENERIC_REGNUM_FP, reg_value.get())) { uint32_t frame_count = 0; uint64_t fp = 0; - if (reg_value.info.size == 4) - fp = reg_value.value.uint32; + if (reg_value->info.size == 4) + fp = reg_value->value.uint32; else - fp = reg_value.value.uint64; + fp = reg_value->value.uint64; while (fp != 0) { // Make sure we never recurse more than 256 times so we don't recurse too // far or @@ -2667,7 +2677,7 @@ static void ReadStackMemory(nub_process_t pid, nub_thread_t tid, if (++frame_count > backtrace_limit) break; - const nub_size_t read_size = reg_value.info.size * 2; + const nub_size_t read_size = reg_value->info.size * 2; StackMemory stack_memory; stack_memory.length = read_size; if (DNBProcessMemoryRead(pid, fp, read_size, stack_memory.bytes) != @@ -2679,7 +2689,7 @@ static void ReadStackMemory(nub_process_t pid, nub_thread_t tid, // Put the entry into the cache stack_mmap[fp] = stack_memory; // Dereference the frame pointer to get to the previous frame pointer - if (reg_value.info.size == 4) + if (reg_value->info.size == 4) fp = ((uint32_t *)stack_memory.bytes)[0]; else fp = ((uint64_t *)stack_memory.bytes)[0]; @@ -2842,31 +2852,35 @@ rnb_err_t RNBRemote::SendStopReplyPacketForThread(nub_thread_t tid) { if (g_num_reg_entries == 0) InitializeRegisters(); - if (g_reg_entries != NULL) { - auto interesting_regset = [](int regset) -> bool { -#if defined(__arm64__) || defined(__aarch64__) - // GPRs and exception registers, helpful for debugging - // from packet logs. - return regset == 1 || regset == 3; -#else - return regset == 1; -#endif - }; - - DNBRegisterValue reg_value; - for (uint32_t reg = 0; reg < g_num_reg_entries; reg++) { - // Expedite all registers in the first register set that aren't - // contained in other registers - if (interesting_regset(g_reg_entries[reg].nub_info.set) && - g_reg_entries[reg].nub_info.value_regs == NULL) { - if (!DNBThreadGetRegisterValueByID( - pid, tid, g_reg_entries[reg].nub_info.set, - g_reg_entries[reg].nub_info.reg, ®_value)) - continue; - - debugserver_regnum_with_fixed_width_hex_register_value( - ostrm, pid, tid, &g_reg_entries[reg], ®_value); - } + nub_size_t num_reg_sets = 0; + const DNBRegisterSetInfo *reg_sets = DNBGetRegisterSetInfo(&num_reg_sets); + + std::unique_ptr reg_value = + std::make_unique(); + for (uint32_t reg = 0; reg < g_num_reg_entries; reg++) { + int regset = g_reg_entries[reg].nub_info.set; + bool include_reg = false; + // Expedite interesting register sets, all registers not + // contained in other registers + if (g_reg_entries[reg].nub_info.value_regs == nullptr && + (strcmp("General Purpose Registers", reg_sets[regset].name) == 0 || + strcmp("Exception State Registers", reg_sets[regset].name) == 0)) + include_reg = true; + // Include the SME state registers + if (strcmp("svcr", g_reg_entries[reg].nub_info.name) == 0 || + strcmp("tpidr2", g_reg_entries[reg].nub_info.name) == 0 || + strcmp("svl", g_reg_entries[reg].nub_info.name) == 0) + include_reg = true; + + if (include_reg) { + if (!DNBThreadGetRegisterValueByID(pid, tid, regset, + g_reg_entries[reg].nub_info.reg, + reg_value.get())) + continue; + + debugserver_regnum_with_fixed_width_hex_register_value( + ostrm, pid, tid, &g_reg_entries[reg], reg_value.get(), + std::nullopt); } } @@ -3326,14 +3340,19 @@ rnb_err_t RNBRemote::HandlePacket_G(const char *p) { if (g_num_reg_entries == 0) InitializeRegisters(); - StdStringExtractor packet(p); - packet.SetFilePos(1); // Skip the 'G' + p += 1; // Skip the 'G' nub_process_t pid = m_ctx.ProcessID(); nub_thread_t tid = ExtractThreadIDFromThreadSuffix(p); if (tid == INVALID_NUB_THREAD) return HandlePacket_ILLFORMED(__FILE__, __LINE__, p, "No thread specified in p packet"); + // Skip the thread specification in `G;thread:3488ea;[..data...]` + const char *last_semi = strrchr(p, ';'); + if (last_semi) + p = last_semi + 1; + + StdStringExtractor packet(p); // Get the register context size first by calling with NULL buffer nub_size_t reg_ctx_size = DNBThreadGetRegisterContext(pid, tid, NULL, 0); @@ -4212,7 +4231,9 @@ rnb_err_t RNBRemote::HandlePacket_p(const char *p) { append_hex_value(ostrm, zeros.data(), zeros.size(), false); } } else { - register_value_in_hex_fixed_width(ostrm, pid, tid, reg_entry, NULL); + if (!register_value_in_hex_fixed_width(ostrm, pid, tid, reg_entry, NULL, + std::nullopt)) + return SendErrorPacket("E97"); } return SendPacket(ostrm.str()); } @@ -4266,9 +4287,10 @@ rnb_err_t RNBRemote::HandlePacket_P(const char *p) { return SendErrorPacket("E48"); } - DNBRegisterValue reg_value; - reg_value.info = reg_entry->nub_info; - packet.GetHexBytes(reg_value.value.v_sint8, reg_entry->nub_info.size, 0xcc); + std::unique_ptr reg_value = + std::make_unique(); + reg_value->info = reg_entry->nub_info; + packet.GetHexBytes(reg_value->value.v_sint8, reg_entry->nub_info.size, 0xcc); nub_thread_t tid = ExtractThreadIDFromThreadSuffix(p); if (tid == INVALID_NUB_THREAD) @@ -4276,7 +4298,8 @@ rnb_err_t RNBRemote::HandlePacket_P(const char *p) { "No thread specified in p packet"); if (!DNBThreadSetRegisterValueByID(pid, tid, reg_entry->nub_info.set, - reg_entry->nub_info.reg, ®_value)) { + reg_entry->nub_info.reg, + reg_value.get())) { return SendErrorPacket("E32"); } return SendPacket("OK"); @@ -5561,7 +5584,8 @@ RNBRemote::GetJSONThreadsInfo(bool threads_with_valid_stop_info_only) { } } - DNBRegisterValue reg_value; + std::unique_ptr reg_value = + std::make_unique(); if (g_reg_entries != NULL) { JSONGenerator::DictionarySP registers_dict_sp( @@ -5574,14 +5598,14 @@ RNBRemote::GetJSONThreadsInfo(bool threads_with_valid_stop_info_only) { g_reg_entries[reg].nub_info.value_regs == NULL) { if (!DNBThreadGetRegisterValueByID( pid, tid, g_reg_entries[reg].nub_info.set, - g_reg_entries[reg].nub_info.reg, ®_value)) + g_reg_entries[reg].nub_info.reg, reg_value.get())) continue; std::ostringstream reg_num; reg_num << std::dec << g_reg_entries[reg].debugserver_regnum; // Encode native byte ordered bytes as hex ascii registers_dict_sp->AddBytesAsHexASCIIString( - reg_num.str(), reg_value.value.v_uint8, + reg_num.str(), reg_value->value.v_uint8, g_reg_entries[reg].nub_info.size); } }