diff options
Diffstat (limited to 'pkg/sentry/platform')
26 files changed, 402 insertions, 500 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index acad4c793..f8ccb7430 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -91,6 +91,13 @@ func bluepillSigBus(c *vCPU) { } } +// bluepillHandleEnosys is reponsible for handling enosys error. +// +//go:nosplit +func bluepillHandleEnosys(c *vCPU) { + throw("run failed: ENOSYS") +} + // bluepillReadyStopGuest checks whether the current vCPU is ready for interrupt injection. // //go:nosplit @@ -126,3 +133,10 @@ func bluepillReadyStopGuest(c *vCPU) bool { } return true } + +// bluepillArchHandleExit checks architecture specific exitcode. +// +//go:nosplit +func bluepillArchHandleExit(c *vCPU, context unsafe.Pointer) { + c.die(bluepillArchContext(context), "unknown") +} diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go index 965ad66b5..1f09813ba 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64.go @@ -42,6 +42,13 @@ var ( sErrEsr: _ESR_ELx_SERR_NMI, }, } + + // vcpuExtDabt is the event of ext_dabt. + vcpuExtDabt = kvmVcpuEvents{ + exception: exception{ + extDabtPending: 1, + }, + } ) // getTLS returns the value of TPIDR_EL0 register. diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go index 9433d4da5..4d912769a 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go @@ -85,7 +85,7 @@ func bluepillStopGuest(c *vCPU) { uintptr(c.fd), _KVM_SET_VCPU_EVENTS, uintptr(unsafe.Pointer(&vcpuSErrBounce))); errno != 0 { - throw("sErr injection failed") + throw("bounce sErr injection failed") } } @@ -93,18 +93,54 @@ func bluepillStopGuest(c *vCPU) { // //go:nosplit func bluepillSigBus(c *vCPU) { + // Host must support ARM64_HAS_RAS_EXTN. if _, _, errno := syscall.RawSyscall( // escapes: no. syscall.SYS_IOCTL, uintptr(c.fd), _KVM_SET_VCPU_EVENTS, uintptr(unsafe.Pointer(&vcpuSErrNMI))); errno != 0 { - throw("sErr injection failed") + if errno == syscall.EINVAL { + throw("No ARM64_HAS_RAS_EXTN feature in host.") + } + throw("nmi sErr injection failed") } } +// bluepillExtDabt is reponsible for injecting external data abort. +// +//go:nosplit +func bluepillExtDabt(c *vCPU) { + if _, _, errno := syscall.RawSyscall( // escapes: no. + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_SET_VCPU_EVENTS, + uintptr(unsafe.Pointer(&vcpuExtDabt))); errno != 0 { + throw("ext_dabt injection failed") + } +} + +// bluepillHandleEnosys is reponsible for handling enosys error. +// +//go:nosplit +func bluepillHandleEnosys(c *vCPU) { + bluepillExtDabt(c) +} + // bluepillReadyStopGuest checks whether the current vCPU is ready for sError injection. // //go:nosplit func bluepillReadyStopGuest(c *vCPU) bool { return true } + +// bluepillArchHandleExit checks architecture specific exitcode. +// +//go:nosplit +func bluepillArchHandleExit(c *vCPU, context unsafe.Pointer) { + switch c.runData.exitReason { + case _KVM_EXIT_ARM_NISV: + bluepillExtDabt(c) + default: + c.die(bluepillArchContext(context), "unknown") + } +} diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 75085ac6a..8c5369377 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -148,6 +148,9 @@ func bluepillHandler(context unsafe.Pointer) { // mode and have interrupts disabled. bluepillSigBus(c) continue // Rerun vCPU. + case syscall.ENOSYS: + bluepillHandleEnosys(c) + continue default: throw("run failed") } @@ -220,7 +223,7 @@ func bluepillHandler(context unsafe.Pointer) { c.die(bluepillArchContext(context), "entry failed") return default: - c.die(bluepillArchContext(context), "unknown") + bluepillArchHandleExit(c, context) return } } diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go index 0b06a923a..9db1db4e9 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_arm64.go @@ -47,10 +47,11 @@ type userRegs struct { } type exception struct { - sErrPending uint8 - sErrHasEsr uint8 - pad [6]uint8 - sErrEsr uint64 + sErrPending uint8 + sErrHasEsr uint8 + extDabtPending uint8 + pad [5]uint8 + sErrEsr uint64 } type kvmVcpuEvents struct { diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go index 6abaa21c4..2492d57be 100644 --- a/pkg/sentry/platform/kvm/kvm_const.go +++ b/pkg/sentry/platform/kvm/kvm_const.go @@ -56,6 +56,7 @@ const ( _KVM_EXIT_FAIL_ENTRY = 0x9 _KVM_EXIT_INTERNAL_ERROR = 0x11 _KVM_EXIT_SYSTEM_EVENT = 0x18 + _KVM_EXIT_ARM_NISV = 0x1c ) // KVM capability options. diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index 54837f20c..aa2d21748 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -54,7 +54,7 @@ func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) { pageTable.Map( usermem.Addr(ring0.KernelStartAddress|pr.virtual), pr.length, - pagetables.MapOpts{AccessType: usermem.AnyAccess}, + pagetables.MapOpts{AccessType: usermem.AnyAccess, Global: true}, pr.physical) return true // Keep iterating. diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index fd92c3873..a466acf4d 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -79,7 +79,7 @@ func (c *vCPU) initArchState() error { } // tcr_el1 - data = _TCR_TXSZ_VA48 | _TCR_CACHE_FLAGS | _TCR_SHARED | _TCR_TG_FLAGS | _TCR_ASID16 | _TCR_IPS_40BITS | _TCR_A1 + data = _TCR_TXSZ_VA48 | _TCR_CACHE_FLAGS | _TCR_SHARED | _TCR_TG_FLAGS | _TCR_ASID16 | _TCR_IPS_40BITS reg.id = _KVM_ARM64_REGS_TCR_EL1 if err := c.setOneRegister(®); err != nil { return err @@ -103,7 +103,7 @@ func (c *vCPU) initArchState() error { c.SetTtbr0Kvm(uintptr(data)) // ttbr1_el1 - data = c.machine.kernel.PageTables.TTBR1_EL1(false, 1) + data = c.machine.kernel.PageTables.TTBR1_EL1(false, 0) reg.id = _KVM_ARM64_REGS_TTBR1_EL1 if err := c.setOneRegister(®); err != nil { @@ -235,12 +235,12 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) ttbr0App := switchOpts.PageTables.TTBR0_EL1(false, 0) c.SetTtbr0App(uintptr(ttbr0App)) - // TODO(gvisor.dev/issue/1238): full context-switch supporting for Arm64. + // Full context-switch supporting for Arm64. // The Arm64 user-mode execution state consists of: // x0-x30 // PC, SP, PSTATE // V0-V31: 32 128-bit registers for floating point, and simd - // FPSR + // FPSR, FPCR // TPIDR_EL0, used for TLS appRegs := switchOpts.Registers c.SetAppAddr(ring0.KernelStartAddress | uintptr(unsafe.Pointer(appRegs))) @@ -254,22 +254,30 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) case ring0.Syscall: // Fast path: system call executed. return usermem.NoAccess, nil - case ring0.PageFault: return c.fault(int32(syscall.SIGSEGV), info) case ring0.El0ErrNMI: return c.fault(int32(syscall.SIGBUS), info) - case ring0.Vector(bounce): // ring0.VirtualizationException + case ring0.Vector(bounce): // ring0.VirtualizationException. return usermem.NoAccess, platform.ErrContextInterrupt case ring0.El0SyncUndef: return c.fault(int32(syscall.SIGILL), info) - case ring0.El1SyncUndef: + case ring0.El0SyncDbg: *info = arch.SignalInfo{ - Signo: int32(syscall.SIGILL), - Code: 1, // ILL_ILLOPC (illegal opcode). + Signo: int32(syscall.SIGTRAP), + Code: 1, // TRAP_BRKPT (breakpoint). } info.SetAddr(switchOpts.Registers.Pc) // Include address. return usermem.AccessType{}, platform.ErrContextSignal + case ring0.El0SyncSpPc: + *info = arch.SignalInfo{ + Signo: int32(syscall.SIGBUS), + Code: 2, // BUS_ADRERR (physical address does not exist). + } + return usermem.NoAccess, platform.ErrContextSignal + case ring0.El0SyncSys, + ring0.El0SyncWfx: + return usermem.NoAccess, nil // skip for now. default: panic(fmt.Sprintf("unexpected vector: 0x%x", vector)) } diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index f56aa3b79..571bfcc2e 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -18,8 +18,8 @@ // // In a nutshell, it works as follows: // -// The creation of a new address space creates a new child processes with a -// single thread which is traced by a single goroutine. +// The creation of a new address space creates a new child process with a single +// thread which is traced by a single goroutine. // // A context is just a collection of temporary variables. Calling Switch on a // context does the following: diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index 812ab80ef..aacd7ce70 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -590,7 +590,7 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool { // facilitate vsyscall emulation. See patchSignalInfo. patchSignalInfo(regs, &c.signalInfo) return false - } else if c.signalInfo.Code <= 0 && c.signalInfo.Pid() == int32(os.Getpid()) { + } else if c.signalInfo.Code <= 0 && c.signalInfo.PID() == int32(os.Getpid()) { // The signal was generated by this process. That means // that it was an interrupt or something else that we // should bail for. Note that we ignore signals diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD index 679b287c3..2852b7387 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/sentry/platform/ring0/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "arch_genrule", "go_library") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) @@ -39,19 +39,19 @@ go_template_instance( template = ":defs_arm64", ) -genrule( +arch_genrule( name = "entry_impl_amd64", srcs = ["entry_amd64.s"], outs = ["entry_impl_amd64.s"], - cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@", + cmd = "(echo -e '// build +amd64\\n' && QEMU $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(location entry_amd64.s)) > $@", tools = ["//pkg/sentry/platform/ring0/gen_offsets"], ) -genrule( +arch_genrule( name = "entry_impl_arm64", srcs = ["entry_arm64.s"], outs = ["entry_impl_arm64.s"], - cmd = "(echo -e '// build +arm64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@", + cmd = "(echo -e '// build +arm64\\n' && QEMU $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(location entry_arm64.s)) > $@", tools = ["//pkg/sentry/platform/ring0/gen_offsets"], ) @@ -72,7 +72,6 @@ go_library( "lib_amd64.s", "lib_arm64.go", "lib_arm64.s", - "lib_arm64_unsafe.go", "ring0.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/sentry/platform/ring0/aarch64.go index 327d48465..3bda594f9 100644 --- a/pkg/sentry/platform/ring0/aarch64.go +++ b/pkg/sentry/platform/ring0/aarch64.go @@ -90,10 +90,12 @@ const ( El0SyncIa El0SyncFpsimdAcc El0SyncSveAcc + El0SyncFpsimdExc El0SyncSys El0SyncSpPc El0SyncUndef El0SyncDbg + El0SyncWfx El0SyncInv El0ErrNMI diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s index f489ad352..b2bb18257 100644 --- a/pkg/sentry/platform/ring0/entry_arm64.s +++ b/pkg/sentry/platform/ring0/entry_arm64.s @@ -132,40 +132,6 @@ MOVD offset+PTRACE_R29(reg), R29; \ MOVD offset+PTRACE_R30(reg), R30; -// NOP-s -#define nop31Instructions() \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; \ - WORD $0xd503201f; - #define ESR_ELx_EC_UNKNOWN (0x00) #define ESR_ELx_EC_WFx (0x01) /* Unallocated EC: 0x02 */ @@ -305,24 +271,32 @@ WORD $0xd538d092; //MRS TPIDR_EL1, R18 // SWITCH_TO_APP_PAGETABLE sets a new pagetable for a container application. -#define SWITCH_TO_APP_PAGETABLE(from) \ - MRS TTBR1_EL1, R0; \ - MOVD CPU_APP_ASID(from), R1; \ - BFI $48, R1, $16, R0; \ - MSR R0, TTBR1_EL1; \ // set the ASID in TTBR1_EL1 (since TCR.A1 is set) - ISB $15; \ - MOVD CPU_TTBR0_APP(from), RSV_REG; \ - MSR RSV_REG, TTBR0_EL1; +#define SWITCH_TO_APP_PAGETABLE() \ + MOVD CPU_APP_ASID(RSV_REG), RSV_REG_APP; \ + MOVD CPU_TTBR0_APP(RSV_REG), RSV_REG; \ + BFI $48, RSV_REG_APP, $16, RSV_REG; \ + MSR RSV_REG, TTBR0_EL1; \ + ISB $15; // SWITCH_TO_KVM_PAGETABLE sets the kvm pagetable. -#define SWITCH_TO_KVM_PAGETABLE(from) \ - MRS TTBR1_EL1, R0; \ - MOVD $1, R1; \ - BFI $48, R1, $16, R0; \ - MSR R0, TTBR1_EL1; \ - ISB $15; \ - MOVD CPU_TTBR0_KVM(from), RSV_REG; \ - MSR RSV_REG, TTBR0_EL1; +#define SWITCH_TO_KVM_PAGETABLE() \ + MOVD CPU_TTBR0_KVM(RSV_REG), RSV_REG; \ + MOVD $1, RSV_REG_APP; \ + BFI $48, RSV_REG_APP, $16, RSV_REG; \ + MSR RSV_REG, TTBR0_EL1; \ + ISB $15; + +TEXT ·EnableVFP(SB),NOSPLIT,$0 + MOVD $FPEN_ENABLE, R0 + WORD $0xd5181040 //MSR R0, CPACR_EL1 + ISB $15 + RET + +TEXT ·DisableVFP(SB),NOSPLIT,$0 + MOVD $0, R0 + WORD $0xd5181040 //MSR R0, CPACR_EL1 + ISB $15 + RET #define VFP_ENABLE \ MOVD $FPEN_ENABLE, R0; \ @@ -370,12 +344,12 @@ MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); \ LOAD_KERNEL_STACK(RSV_REG); // Load the temporary stack. -// EXCEPTION_WITH_ERROR is a common exception handler function. -#define EXCEPTION_WITH_ERROR(user, vector) \ +// EXCEPTION_EL0 is a common el0 exception handler function. +#define EXCEPTION_EL0(vector) \ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18 WORD $0xd538601a; \ //MRS FAR_EL1, R26 MOVD R26, CPU_FAULT_ADDR(RSV_REG); \ - MOVD $user, R3; \ + MOVD $1, R3; \ MOVD R3, CPU_ERROR_TYPE(RSV_REG); \ // Set error type to user. MOVD $vector, R3; \ MOVD R3, CPU_VECTOR_CODE(RSV_REG); \ @@ -383,6 +357,12 @@ MOVD R3, CPU_ERROR_CODE(RSV_REG); \ B ·kernelExitToEl1(SB); +// EXCEPTION_EL1 is a common el1 exception handler function. +#define EXCEPTION_EL1(vector) \ + MOVD $vector, R3; \ + MOVD R3, 8(RSP); \ + B ·HaltEl1ExceptionAndResume(SB); + // storeAppASID writes the application's asid value. TEXT ·storeAppASID(SB),NOSPLIT,$0-8 MOVD asid+0(FP), R1 @@ -430,6 +410,16 @@ TEXT ·HaltEl1SvcAndResume(SB),NOSPLIT,$0 CALL ·kernelSyscall(SB) // Call the trampoline. B ·kernelExitToEl1(SB) // Resume. +// HaltEl1ExceptionAndResume calls Hooks.KernelException and resume. +TEXT ·HaltEl1ExceptionAndResume(SB),NOSPLIT,$0-8 + WORD $0xd538d092 // MRS TPIDR_EL1, R18 + MOVD CPU_SELF(RSV_REG), R3 // Load vCPU. + MOVD R3, 8(RSP) // First argument (vCPU). + MOVD vector+0(FP), R3 + MOVD R3, 16(RSP) // Second argument (vector). + CALL ·kernelException(SB) // Call the trampoline. + B ·kernelExitToEl1(SB) // Resume. + // Shutdown stops the guest. TEXT ·Shutdown(SB),NOSPLIT,$0 // PSCI EVENT. @@ -502,7 +492,7 @@ do_exit_to_el0: WORD $0xd538d092 //MRS TPIDR_EL1, R18 - SWITCH_TO_APP_PAGETABLE(RSV_REG) + SWITCH_TO_APP_PAGETABLE() LDP 16*1(RSP), (R0, R1) LDP 16*0(RSP), (RSV_REG, RSV_REG_APP) @@ -527,10 +517,10 @@ TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R1 MOVD R1, RSP - SWITCH_TO_KVM_PAGETABLE(RSV_REG) + REGISTERS_LOAD(RSV_REG, CPU_REGISTERS) + SWITCH_TO_KVM_PAGETABLE() MRS TPIDR_EL1, RSV_REG - REGISTERS_LOAD(RSV_REG, CPU_REGISTERS) MOVD CPU_REGISTERS+PTRACE_R9(RSV_REG), RSV_REG_APP ERET() @@ -538,8 +528,16 @@ TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 // Start is the CPU entrypoint. TEXT ·Start(SB),NOSPLIT,$0 // Init. - MOVD $SCTLR_EL1_DEFAULT, R1 - MSR R1, SCTLR_EL1 + WORD $0xd508871f // __tlbi(vmalle1) + DSB $7 // dsb(nsh) + + MOVD $1<<12, R1 // Reset mdscr_el1 and disable + MSR R1, MDSCR_EL1 // access to the DCC from EL0 + ISB $15 + + MRS TTBR1_EL1, R1 + MSR R1, TTBR0_EL1 + ISB $15 MOVD $CNTKCTL_EL1_DEFAULT, R1 MSR R1, CNTKCTL_EL1 @@ -548,6 +546,15 @@ TEXT ·Start(SB),NOSPLIT,$0 ORR $0xffff000000000000, RSV_REG, RSV_REG WORD $0xd518d092 //MSR R18, TPIDR_EL1 + // Init. + MOVD $SCTLR_EL1_DEFAULT, R1 // re-enable the mmu. + MSR R1, SCTLR_EL1 + ISB $15 + WORD $0xd508751f // ic iallu + + DSB $7 // dsb(nsh) + ISB $15 + B ·kernelExitToEl1(SB) // El1_sync_invalid is the handler for an invalid EL1_sync. @@ -569,62 +576,46 @@ TEXT ·El1_error_invalid(SB),NOSPLIT,$0 // El1_sync is the handler for El1_sync. TEXT ·El1_sync(SB),NOSPLIT,$0 KERNEL_ENTRY_FROM_EL1 - WORD $0xd5385219 // MRS ESR_EL1, R25 - LSR $ESR_ELx_EC_SHIFT, R25, R24 + MRS ESR_EL1, R25 // read the syndrome register + LSR $ESR_ELx_EC_SHIFT, R25, R24 // exception class CMP $ESR_ELx_EC_DABT_CUR, R24 - BEQ el1_da + BEQ el1_da // data abort in EL1 CMP $ESR_ELx_EC_IABT_CUR, R24 - BEQ el1_ia - CMP $ESR_ELx_EC_SYS64, R24 - BEQ el1_undef + BEQ el1_ia // instruction abort in EL1 CMP $ESR_ELx_EC_SP_ALIGN, R24 - BEQ el1_sp_pc + BEQ el1_sp_pc // stack alignment exception CMP $ESR_ELx_EC_PC_ALIGN, R24 - BEQ el1_sp_pc + BEQ el1_sp_pc // pc alignment exception CMP $ESR_ELx_EC_UNKNOWN, R24 - BEQ el1_undef + BEQ el1_undef // unknown exception in EL1 CMP $ESR_ELx_EC_SVC64, R24 - BEQ el1_svc + BEQ el1_svc // SVC in 64-bit state CMP $ESR_ELx_EC_BREAKPT_CUR, R24 - BGE el1_dbg + BEQ el1_dbg // debug exception in EL1 CMP $ESR_ELx_EC_FP_ASIMD, R24 - BEQ el1_fpsimd_acc + BEQ el1_fpsimd_acc // FP/ASIMD access + CMP $ESR_ELx_EC_SVE, R24 + BEQ el1_sve_acc // SVE access B el1_invalid el1_da: + EXCEPTION_EL1(El1SyncDa) el1_ia: - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - WORD $0xd538601a //MRS FAR_EL1, R26 - - MOVD R26, CPU_FAULT_ADDR(RSV_REG) - - MOVD $0, CPU_ERROR_TYPE(RSV_REG) - - MOVD $PageFault, R3 - MOVD R3, CPU_VECTOR_CODE(RSV_REG) - - B ·HaltAndResume(SB) - + EXCEPTION_EL1(El1SyncIa) el1_sp_pc: - B ·Shutdown(SB) - + EXCEPTION_EL1(El1SyncSpPc) el1_undef: - B ·Shutdown(SB) - + EXCEPTION_EL1(El1SyncUndef) el1_svc: - MOVD $0, CPU_ERROR_CODE(RSV_REG) - MOVD $0, CPU_ERROR_TYPE(RSV_REG) B ·HaltEl1SvcAndResume(SB) - el1_dbg: - B ·Shutdown(SB) - + EXCEPTION_EL1(El1SyncDbg) el1_fpsimd_acc: +el1_sve_acc: VFP_ENABLE B ·kernelExitToEl1(SB) // Resume. - el1_invalid: - B ·Shutdown(SB) + EXCEPTION_EL1(El1SyncInv) // El1_irq is the handler for El1_irq. TEXT ·El1_irq(SB),NOSPLIT,$0 @@ -641,28 +632,32 @@ TEXT ·El1_error(SB),NOSPLIT,$0 // El0_sync is the handler for El0_sync. TEXT ·El0_sync(SB),NOSPLIT,$0 KERNEL_ENTRY_FROM_EL0 - WORD $0xd5385219 // MRS ESR_EL1, R25 - LSR $ESR_ELx_EC_SHIFT, R25, R24 + MRS ESR_EL1, R25 // read the syndrome register + LSR $ESR_ELx_EC_SHIFT, R25, R24 // exception class CMP $ESR_ELx_EC_SVC64, R24 - BEQ el0_svc + BEQ el0_svc // SVC in 64-bit state CMP $ESR_ELx_EC_DABT_LOW, R24 - BEQ el0_da + BEQ el0_da // data abort in EL0 CMP $ESR_ELx_EC_IABT_LOW, R24 - BEQ el0_ia + BEQ el0_ia // instruction abort in EL0 CMP $ESR_ELx_EC_FP_ASIMD, R24 - BEQ el0_fpsimd_acc + BEQ el0_fpsimd_acc // FP/ASIMD access CMP $ESR_ELx_EC_SVE, R24 - BEQ el0_sve_acc + BEQ el0_sve_acc // SVE access CMP $ESR_ELx_EC_FP_EXC64, R24 - BEQ el0_fpsimd_exc + BEQ el0_fpsimd_exc // FP/ASIMD exception CMP $ESR_ELx_EC_SP_ALIGN, R24 - BEQ el0_sp_pc + BEQ el0_sp_pc // stack alignment exception CMP $ESR_ELx_EC_PC_ALIGN, R24 - BEQ el0_sp_pc + BEQ el0_sp_pc // pc alignment exception CMP $ESR_ELx_EC_UNKNOWN, R24 - BEQ el0_undef + BEQ el0_undef // unknown exception in EL0 CMP $ESR_ELx_EC_BREAKPT_LOW, R24 - BGE el0_dbg + BEQ el0_dbg // debug exception in EL0 + CMP $ESR_ELx_EC_SYS64, R24 + BEQ el0_sys // configurable trap + CMP $ESR_ELx_EC_WFx, R24 + BEQ el0_wfx // WFX trap B el0_invalid el0_svc: @@ -680,28 +675,25 @@ el0_svc: el0_da: el0_ia: - EXCEPTION_WITH_ERROR(1, PageFault) - + EXCEPTION_EL0(PageFault) el0_fpsimd_acc: - B ·Shutdown(SB) - + EXCEPTION_EL0(El0SyncFpsimdAcc) el0_sve_acc: - B ·Shutdown(SB) - + EXCEPTION_EL0(El0SyncSveAcc) el0_fpsimd_exc: - B ·Shutdown(SB) - + EXCEPTION_EL0(El0SyncFpsimdExc) el0_sp_pc: - B ·Shutdown(SB) - + EXCEPTION_EL0(El0SyncSpPc) el0_undef: - EXCEPTION_WITH_ERROR(1, El0SyncUndef) - + EXCEPTION_EL0(El0SyncUndef) el0_dbg: - B ·Shutdown(SB) - + EXCEPTION_EL0(El0SyncDbg) +el0_sys: + EXCEPTION_EL0(El0SyncSys) +el0_wfx: + EXCEPTION_EL0(El0SyncWfx) el0_invalid: - B ·Shutdown(SB) + EXCEPTION_EL0(El0SyncInv) TEXT ·El0_irq(SB),NOSPLIT,$0 B ·Shutdown(SB) @@ -716,36 +708,11 @@ TEXT ·El0_error(SB),NOSPLIT,$0 CMP $ESR_ELx_SERR_NMI, R24 BEQ el0_nmi B el0_bounce -el0_nmi: - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - WORD $0xd538601a //MRS FAR_EL1, R26 - - MOVD R26, CPU_FAULT_ADDR(RSV_REG) - - MOVD $1, R3 - MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user. - - MOVD $El0ErrNMI, R3 - MOVD R3, CPU_VECTOR_CODE(RSV_REG) - - MRS ESR_EL1, R3 - MOVD R3, CPU_ERROR_CODE(RSV_REG) - - B ·kernelExitToEl1(SB) +el0_nmi: + EXCEPTION_EL0(El0ErrNMI) el0_bounce: - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - WORD $0xd538601a //MRS FAR_EL1, R26 - - MOVD R26, CPU_FAULT_ADDR(RSV_REG) - - MOVD $1, R3 - MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user. - - MOVD $VirtualizationException, R3 - MOVD R3, CPU_VECTOR_CODE(RSV_REG) - - B ·kernelExitToEl1(SB) + EXCEPTION_EL0(VirtualizationException) TEXT ·El0_sync_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) @@ -760,79 +727,43 @@ TEXT ·El0_error_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) // Vectors implements exception vector table. +// The start address of exception vector table should be 11-bits aligned. +// For detail, please refer to arm developer document: +// https://developer.arm.com/documentation/100933/0100/AArch64-exception-vector-table +// Also can refer to the code in linux kernel: arch/arm64/kernel/entry.S TEXT ·Vectors(SB),NOSPLIT,$0 + PCALIGN $2048 B ·El1_sync_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El1_irq_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El1_fiq_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El1_error_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El1_sync(SB) - nop31Instructions() + PCALIGN $128 B ·El1_irq(SB) - nop31Instructions() + PCALIGN $128 B ·El1_fiq(SB) - nop31Instructions() + PCALIGN $128 B ·El1_error(SB) - nop31Instructions() + PCALIGN $128 B ·El0_sync(SB) - nop31Instructions() + PCALIGN $128 B ·El0_irq(SB) - nop31Instructions() + PCALIGN $128 B ·El0_fiq(SB) - nop31Instructions() + PCALIGN $128 B ·El0_error(SB) - nop31Instructions() + PCALIGN $128 B ·El0_sync_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El0_irq_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El0_fiq_invalid(SB) - nop31Instructions() + PCALIGN $128 B ·El0_error_invalid(SB) - nop31Instructions() - - // The exception-vector-table is required to be 11-bits aligned. - // Please see Linux source code as reference: arch/arm64/kernel/entry.s. - // For gvisor, I defined it as 4K in length, filled the 2nd 2K part with NOPs. - // So that, I can safely move the 1st 2K part into the address with 11-bits alignment. - WORD $0xd503201f //nop - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() - WORD $0xd503201f - nop31Instructions() diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD index 9742308d8..a9703baf6 100644 --- a/pkg/sentry/platform/ring0/gen_offsets/BUILD +++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD @@ -24,6 +24,9 @@ go_binary( "defs_impl_arm64.go", "main.go", ], + # Use the libc malloc to avoid any extra dependencies. This is required to + # pass the sentry deps test. + system_malloc = True, visibility = [ "//pkg/sentry/platform/kvm:__pkg__", "//pkg/sentry/platform/ring0:__pkg__", diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index b55dc29b3..36a60700e 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -65,6 +65,7 @@ func (k *Kernel) init(maxCPUs int) { } } +// EntryRegions returns the set of kernel entry regions (must be mapped). func (k *Kernel) EntryRegions() map[uintptr]uintptr { regions := make(map[uintptr]uintptr) diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go index 6cbbf001f..c05284641 100644 --- a/pkg/sentry/platform/ring0/kernel_arm64.go +++ b/pkg/sentry/platform/ring0/kernel_arm64.go @@ -24,6 +24,10 @@ func HaltAndResume() //go:nosplit func HaltEl1SvcAndResume() +// HaltEl1ExceptionAndResume calls Hooks.KernelException and resume. +//go:nosplit +func HaltEl1ExceptionAndResume() + // init initializes architecture-specific state. func (k *Kernel) init(maxCPUs int) { } @@ -49,11 +53,17 @@ func IsCanonical(addr uint64) bool { return addr <= 0x0000ffffffffffff || addr > 0xffff000000000000 } +// SwitchToUser performs an eret. +// +// The return value is the exception vector. +// +// +checkescape:all +// //go:nosplit func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { storeAppASID(uintptr(switchOpts.UserASID)) if switchOpts.Flush { - FlushTlbAll() + FlushTlbByASID(uintptr(switchOpts.UserASID)) } regs := switchOpts.Registers @@ -61,11 +71,13 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { regs.Pstate &= ^uint64(PsrFlagsClear) regs.Pstate |= UserFlagsSet + EnableVFP() LoadFloatingPoint(switchOpts.FloatingPointState) kernelExitToEl0() SaveFloatingPoint(switchOpts.FloatingPointState) + DisableVFP() vector = c.vecCode diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/sentry/platform/ring0/lib_arm64.go index d91a09de1..a490bf3af 100644 --- a/pkg/sentry/platform/ring0/lib_arm64.go +++ b/pkg/sentry/platform/ring0/lib_arm64.go @@ -22,19 +22,25 @@ func storeAppASID(asid uintptr) // LocalFlushTlbAll same as FlushTlbAll, but only applies to the calling CPU. func LocalFlushTlbAll() -// FlushTlbAll flush all tlb. +// FlushTlbByVA invalidates tlb by VA/Last-level/Inner-Shareable. +func FlushTlbByVA(addr uintptr) + +// FlushTlbByASID invalidates tlb by ASID/Inner-Shareable. +func FlushTlbByASID(asid uintptr) + +// FlushTlbAll invalidates all tlb. func FlushTlbAll() // CPACREL1 returns the value of the CPACR_EL1 register. func CPACREL1() (value uintptr) -// FPCR returns the value of FPCR register. +// GetFPCR returns the value of FPCR register. func GetFPCR() (value uintptr) // SetFPCR writes the FPCR value. func SetFPCR(value uintptr) -// FPSR returns the value of FPSR register. +// GetFPSR returns the value of FPSR register. func GetFPSR() (value uintptr) // SetFPSR writes the FPSR value. @@ -53,9 +59,13 @@ func LoadFloatingPoint(*byte) // SaveFloatingPoint saves floating point state. func SaveFloatingPoint(*byte) +// EnableVFP enables fpsimd. +func EnableVFP() + +// DisableVFP disables fpsimd. +func DisableVFP() + // Init sets function pointers based on architectural features. // // This must be called prior to using ring0. -func Init() { - rewriteVectors() -} +func Init() {} diff --git a/pkg/sentry/platform/ring0/lib_arm64.s b/pkg/sentry/platform/ring0/lib_arm64.s index 19c1fca8b..e39b32841 100644 --- a/pkg/sentry/platform/ring0/lib_arm64.s +++ b/pkg/sentry/platform/ring0/lib_arm64.s @@ -15,6 +15,23 @@ #include "funcdata.h" #include "textflag.h" +#define TLBI_ASID_SHIFT 48 + +TEXT ·FlushTlbByVA(SB),NOSPLIT,$0-8 + MOVD addr+0(FP), R1 + DSB $10 // dsb(ishst) + WORD $0xd50883a1 // tlbi vale1is, x1 + DSB $11 // dsb(ish) + RET + +TEXT ·FlushTlbByASID(SB),NOSPLIT,$0-8 + MOVD asid+0(FP), R1 + LSL $TLBI_ASID_SHIFT, R1, R1 + DSB $10 // dsb(ishst) + WORD $0xd5088341 // tlbi aside1is, x1 + DSB $11 // dsb(ish) + RET + TEXT ·LocalFlushTlbAll(SB),NOSPLIT,$0 DSB $6 // dsb(nshst) WORD $0xd508871f // __tlbi(vmalle1) @@ -35,62 +52,47 @@ TEXT ·CPACREL1(SB),NOSPLIT,$0-8 RET TEXT ·GetFPCR(SB),NOSPLIT,$0-8 - WORD $0xd53b4201 // MRS NZCV, R1 + MOVD FPCR, R1 MOVD R1, ret+0(FP) RET TEXT ·GetFPSR(SB),NOSPLIT,$0-8 - WORD $0xd53b4421 // MRS FPSR, R1 + MOVD FPSR, R1 MOVD R1, ret+0(FP) RET TEXT ·SetFPCR(SB),NOSPLIT,$0-8 MOVD addr+0(FP), R1 - WORD $0xd51b4201 // MSR R1, NZCV + MOVD R1, FPCR RET TEXT ·SetFPSR(SB),NOSPLIT,$0-8 MOVD addr+0(FP), R1 - WORD $0xd51b4421 // MSR R1, FPSR + MOVD R1, FPSR RET TEXT ·SaveVRegs(SB),NOSPLIT,$0-8 MOVD addr+0(FP), R0 // Skip aarch64_ctx, fpsr, fpcr. - FMOVD F0, 16*1(R0) - FMOVD F1, 16*2(R0) - FMOVD F2, 16*3(R0) - FMOVD F3, 16*4(R0) - FMOVD F4, 16*5(R0) - FMOVD F5, 16*6(R0) - FMOVD F6, 16*7(R0) - FMOVD F7, 16*8(R0) - FMOVD F8, 16*9(R0) - FMOVD F9, 16*10(R0) - FMOVD F10, 16*11(R0) - FMOVD F11, 16*12(R0) - FMOVD F12, 16*13(R0) - FMOVD F13, 16*14(R0) - FMOVD F14, 16*15(R0) - FMOVD F15, 16*16(R0) - FMOVD F16, 16*17(R0) - FMOVD F17, 16*18(R0) - FMOVD F18, 16*19(R0) - FMOVD F19, 16*20(R0) - FMOVD F20, 16*21(R0) - FMOVD F21, 16*22(R0) - FMOVD F22, 16*23(R0) - FMOVD F23, 16*24(R0) - FMOVD F24, 16*25(R0) - FMOVD F25, 16*26(R0) - FMOVD F26, 16*27(R0) - FMOVD F27, 16*28(R0) - FMOVD F28, 16*29(R0) - FMOVD F29, 16*30(R0) - FMOVD F30, 16*31(R0) - FMOVD F31, 16*32(R0) - ISB $15 + ADD $16, R0, R0 + + WORD $0xad000400 // stp q0, q1, [x0] + WORD $0xad010c02 // stp q2, q3, [x0, #32] + WORD $0xad021404 // stp q4, q5, [x0, #64] + WORD $0xad031c06 // stp q6, q7, [x0, #96] + WORD $0xad042408 // stp q8, q9, [x0, #128] + WORD $0xad052c0a // stp q10, q11, [x0, #160] + WORD $0xad06340c // stp q12, q13, [x0, #192] + WORD $0xad073c0e // stp q14, q15, [x0, #224] + WORD $0xad084410 // stp q16, q17, [x0, #256] + WORD $0xad094c12 // stp q18, q19, [x0, #288] + WORD $0xad0a5414 // stp q20, q21, [x0, #320] + WORD $0xad0b5c16 // stp q22, q23, [x0, #352] + WORD $0xad0c6418 // stp q24, q25, [x0, #384] + WORD $0xad0d6c1a // stp q26, q27, [x0, #416] + WORD $0xad0e741c // stp q28, q29, [x0, #448] + WORD $0xad0f7c1e // stp q30, q31, [x0, #480] RET @@ -98,39 +100,24 @@ TEXT ·LoadVRegs(SB),NOSPLIT,$0-8 MOVD addr+0(FP), R0 // Skip aarch64_ctx, fpsr, fpcr. - FMOVD 16*1(R0), F0 - FMOVD 16*2(R0), F1 - FMOVD 16*3(R0), F2 - FMOVD 16*4(R0), F3 - FMOVD 16*5(R0), F4 - FMOVD 16*6(R0), F5 - FMOVD 16*7(R0), F6 - FMOVD 16*8(R0), F7 - FMOVD 16*9(R0), F8 - FMOVD 16*10(R0), F9 - FMOVD 16*11(R0), F10 - FMOVD 16*12(R0), F11 - FMOVD 16*13(R0), F12 - FMOVD 16*14(R0), F13 - FMOVD 16*15(R0), F14 - FMOVD 16*16(R0), F15 - FMOVD 16*17(R0), F16 - FMOVD 16*18(R0), F17 - FMOVD 16*19(R0), F18 - FMOVD 16*20(R0), F19 - FMOVD 16*21(R0), F20 - FMOVD 16*22(R0), F21 - FMOVD 16*23(R0), F22 - FMOVD 16*24(R0), F23 - FMOVD 16*25(R0), F24 - FMOVD 16*26(R0), F25 - FMOVD 16*27(R0), F26 - FMOVD 16*28(R0), F27 - FMOVD 16*29(R0), F28 - FMOVD 16*30(R0), F29 - FMOVD 16*31(R0), F30 - FMOVD 16*32(R0), F31 - ISB $15 + ADD $16, R0, R0 + + WORD $0xad400400 // ldp q0, q1, [x0] + WORD $0xad410c02 // ldp q2, q3, [x0, #32] + WORD $0xad421404 // ldp q4, q5, [x0, #64] + WORD $0xad431c06 // ldp q6, q7, [x0, #96] + WORD $0xad442408 // ldp q8, q9, [x0, #128] + WORD $0xad452c0a // ldp q10, q11, [x0, #160] + WORD $0xad46340c // ldp q12, q13, [x0, #192] + WORD $0xad473c0e // ldp q14, q15, [x0, #224] + WORD $0xad484410 // ldp q16, q17, [x0, #256] + WORD $0xad494c12 // ldp q18, q19, [x0, #288] + WORD $0xad4a5414 // ldp q20, q21, [x0, #320] + WORD $0xad4b5c16 // ldp q22, q23, [x0, #352] + WORD $0xad4c6418 // ldp q24, q25, [x0, #384] + WORD $0xad4d6c1a // ldp q26, q27, [x0, #416] + WORD $0xad4e741c // ldp q28, q29, [x0, #448] + WORD $0xad4f7c1e // ldp q30, q31, [x0, #480] RET diff --git a/pkg/sentry/platform/ring0/lib_arm64_unsafe.go b/pkg/sentry/platform/ring0/lib_arm64_unsafe.go deleted file mode 100644 index c05166fea..000000000 --- a/pkg/sentry/platform/ring0/lib_arm64_unsafe.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build arm64 - -package ring0 - -import ( - "reflect" - "syscall" - "unsafe" - - "gvisor.dev/gvisor/pkg/safecopy" - "gvisor.dev/gvisor/pkg/usermem" -) - -const ( - nopInstruction = 0xd503201f - instSize = unsafe.Sizeof(uint32(0)) - vectorsRawLen = 0x800 -) - -func unsafeSlice(addr uintptr, length int) (slice []uint32) { - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) - hdr.Data = addr - hdr.Len = length / int(instSize) - hdr.Cap = length / int(instSize) - return slice -} - -// Work around: move ring0.Vectors() into a specific address with 11-bits alignment. -// -// According to the design documentation of Arm64, -// the start address of exception vector table should be 11-bits aligned. -// Please see the code in linux kernel as reference: arch/arm64/kernel/entry.S -// But, we can't align a function's start address to a specific address by using golang. -// We have raised this question in golang community: -// https://groups.google.com/forum/m/#!topic/golang-dev/RPj90l5x86I -// This function will be removed when golang supports this feature. -// -// There are 2 jobs were implemented in this function: -// 1, move the start address of exception vector table into the specific address. -// 2, modify the offset of each instruction. -func rewriteVectors() { - vectorsBegin := reflect.ValueOf(Vectors).Pointer() - - // The exception-vector-table is required to be 11-bits aligned. - // And the size is 0x800. - // Please see the documentation as reference: - // https://developer.arm.com/docs/100933/0100/aarch64-exception-vector-table - // - // But, golang does not allow to set a function's address to a specific value. - // So, for gvisor, I defined the size of exception-vector-table as 4K, - // filled the 2nd 2K part with NOP-s. - // So that, I can safely move the 1st 2K part into the address with 11-bits alignment. - // - // So, the prerequisite for this function to work correctly is: - // vectorsSafeLen >= 0x1000 - // vectorsRawLen = 0x800 - vectorsSafeLen := int(safecopy.FindEndAddress(vectorsBegin) - vectorsBegin) - if vectorsSafeLen < 2*vectorsRawLen { - panic("Can't update vectors") - } - - vectorsSafeTable := unsafeSlice(vectorsBegin, vectorsSafeLen) // Now a []uint32 - vectorsRawLen32 := vectorsRawLen / int(instSize) - - offset := vectorsBegin & (1<<11 - 1) - if offset != 0 { - offset = 1<<11 - offset - } - - pageBegin := (vectorsBegin + offset) & ^uintptr(usermem.PageSize-1) - - _, _, errno := syscall.Syscall(syscall.SYS_MPROTECT, uintptr(pageBegin), uintptr(usermem.PageSize), uintptr(syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC)) - if errno != 0 { - panic(errno.Error()) - } - - offset = offset / instSize // By index, not bytes. - // Move exception-vector-table into the specific address, should uses memmove here. - for i := 1; i <= vectorsRawLen32; i++ { - vectorsSafeTable[int(offset)+vectorsRawLen32-i] = vectorsSafeTable[vectorsRawLen32-i] - } - - // Adjust branch since instruction was moved forward. - for i := 0; i < vectorsRawLen32; i++ { - if vectorsSafeTable[int(offset)+i] != nopInstruction { - vectorsSafeTable[int(offset)+i] -= uint32(offset) - } - } - - _, _, errno = syscall.Syscall(syscall.SYS_MPROTECT, uintptr(pageBegin), uintptr(usermem.PageSize), uintptr(syscall.PROT_READ|syscall.PROT_EXEC)) - if errno != 0 { - panic(errno.Error()) - } -} diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/sentry/platform/ring0/offsets_arm64.go index 53bc3353c..164db6d5a 100644 --- a/pkg/sentry/platform/ring0/offsets_arm64.go +++ b/pkg/sentry/platform/ring0/offsets_arm64.go @@ -70,10 +70,12 @@ func Emit(w io.Writer) { fmt.Fprintf(w, "#define El0SyncIa 0x%02x\n", El0SyncIa) fmt.Fprintf(w, "#define El0SyncFpsimdAcc 0x%02x\n", El0SyncFpsimdAcc) fmt.Fprintf(w, "#define El0SyncSveAcc 0x%02x\n", El0SyncSveAcc) + fmt.Fprintf(w, "#define El0SyncFpsimdExc 0x%02x\n", El0SyncFpsimdExc) fmt.Fprintf(w, "#define El0SyncSys 0x%02x\n", El0SyncSys) fmt.Fprintf(w, "#define El0SyncSpPc 0x%02x\n", El0SyncSpPc) fmt.Fprintf(w, "#define El0SyncUndef 0x%02x\n", El0SyncUndef) fmt.Fprintf(w, "#define El0SyncDbg 0x%02x\n", El0SyncDbg) + fmt.Fprintf(w, "#define El0SyncWfx 0x%02x\n", El0SyncWfx) fmt.Fprintf(w, "#define El0SyncInv 0x%02x\n", El0SyncInv) fmt.Fprintf(w, "#define El0ErrNMI 0x%02x\n", El0ErrNMI) diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 16d5f478b..9e3539e4c 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -1,74 +1,38 @@ -load("//tools:defs.bzl", "go_library", "go_test", "select_arch") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) -go_template( - name = "generic_walker", - srcs = select_arch( - amd64 = ["walker_amd64.go"], - arm64 = ["walker_arm64.go"], - ), - opt_types = [ - "Visitor", - ], - visibility = [":__pkg__"], -) - -go_template_instance( - name = "walker_map", - out = "walker_map.go", - package = "pagetables", - prefix = "map", - template = ":generic_walker", - types = { - "Visitor": "mapVisitor", - }, -) - -go_template_instance( - name = "walker_unmap", - out = "walker_unmap.go", - package = "pagetables", - prefix = "unmap", - template = ":generic_walker", - types = { - "Visitor": "unmapVisitor", - }, -) +[ + # These files are tagged with relevant build architectures. We can always + # build all the input files, which will be included only in the relevant + # architecture builds. + go_template( + name = "generic_walker_%s" % arch, + srcs = ["walker_%s.go" % arch], + opt_types = [ + "Visitor", + ], + visibility = [":__pkg__"], + ) + for arch in ("amd64", "arm64") +] -go_template_instance( - name = "walker_lookup", - out = "walker_lookup.go", - package = "pagetables", - prefix = "lookup", - template = ":generic_walker", - types = { - "Visitor": "lookupVisitor", - }, -) - -go_template_instance( - name = "walker_empty", - out = "walker_empty.go", - package = "pagetables", - prefix = "empty", - template = ":generic_walker", - types = { - "Visitor": "emptyVisitor", - }, -) - -go_template_instance( - name = "walker_check", - out = "walker_check.go", - package = "pagetables", - prefix = "check", - template = ":generic_walker", - types = { - "Visitor": "checkVisitor", - }, -) +[ + # See above. + go_template_instance( + name = "walker_%s_%s" % (op, arch), + out = "walker_%s_%s.go" % (op, arch), + package = "pagetables", + prefix = op, + template = ":generic_walker_%s" % arch, + types = { + "Visitor": "%sVisitor" % op, + }, + ) + for op in ("map", "unmap", "lookup", "empty", "check") + for arch in ("amd64", "arm64") +] go_library( name = "pagetables", @@ -86,10 +50,14 @@ go_library( "pcids_x86.go", "walker_amd64.go", "walker_arm64.go", - "walker_empty.go", - "walker_lookup.go", - "walker_map.go", - "walker_unmap.go", + ":walker_empty_amd64", + ":walker_empty_arm64", + ":walker_lookup_amd64", + ":walker_lookup_arm64", + ":walker_map_amd64", + ":walker_map_arm64", + ":walker_unmap_amd64", + ":walker_unmap_arm64", ], visibility = [ "//pkg/sentry/platform/kvm:__subpackages__", @@ -108,7 +76,8 @@ go_test( "pagetables_amd64_test.go", "pagetables_arm64_test.go", "pagetables_test.go", - "walker_check.go", + ":walker_check_amd64", + ":walker_check_arm64", ], library = ":pagetables", deps = ["//pkg/usermem"], diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index bc16a1622..7605d0cb2 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -58,6 +58,15 @@ type PageTables struct { readOnlyShared bool } +// Init initializes a set of PageTables. +// +//go:nosplit +func (p *PageTables) Init(allocator Allocator) { + p.Allocator = allocator + p.root = p.Allocator.NewPTEs() + p.rootPhysical = p.Allocator.PhysicalFor(p.root) +} + // NewWithUpper returns new PageTables. // // upperSharedPageTables are used for mapping the upper of addresses, @@ -73,14 +82,17 @@ type PageTables struct { func NewWithUpper(a Allocator, upperSharedPageTables *PageTables, upperStart uintptr) *PageTables { p := new(PageTables) p.Init(a) + if upperSharedPageTables != nil { if !upperSharedPageTables.readOnlyShared { panic("Only read-only shared pagetables can be used as upper") } p.upperSharedPageTables = upperSharedPageTables p.upperStart = upperStart - p.cloneUpperShared() } + + p.InitArch(a) + return p } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go index a4e416af7..520161755 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go @@ -24,6 +24,14 @@ import ( // archPageTables is architecture-specific data. type archPageTables struct { + // root is the pagetable root for kernel space. + root *PTEs + + // rootPhysical is the cached physical address of the root. + // + // This is saved only to prevent constant translation. + rootPhysical uintptr + asid uint16 } @@ -38,7 +46,7 @@ func (p *PageTables) TTBR0_EL1(noFlush bool, asid uint16) uint64 { // //go:nosplit func (p *PageTables) TTBR1_EL1(noFlush bool, asid uint16) uint64 { - return uint64(p.upperSharedPageTables.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset + return uint64(p.archPageTables.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset } // Bits in page table entries. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go index e7ab887e5..4bdde8448 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go @@ -41,13 +41,13 @@ const ( entriesPerPage = 512 ) -// Init initializes a set of PageTables. +// InitArch does some additional initialization related to the architecture. // //go:nosplit -func (p *PageTables) Init(allocator Allocator) { - p.Allocator = allocator - p.root = p.Allocator.NewPTEs() - p.rootPhysical = p.Allocator.PhysicalFor(p.root) +func (p *PageTables) InitArch(allocator Allocator) { + if p.upperSharedPageTables != nil { + p.cloneUpperShared() + } } func pgdIndex(upperStart uintptr) uintptr { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go index 5392bf27a..ad0e30c88 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go @@ -42,13 +42,16 @@ const ( entriesPerPage = 512 ) -// Init initializes a set of PageTables. +// InitArch does some additional initialization related to the architecture. // //go:nosplit -func (p *PageTables) Init(allocator Allocator) { - p.Allocator = allocator - p.root = p.Allocator.NewPTEs() - p.rootPhysical = p.Allocator.PhysicalFor(p.root) +func (p *PageTables) InitArch(allocator Allocator) { + if p.upperSharedPageTables != nil { + p.cloneUpperShared() + } else { + p.archPageTables.root = p.Allocator.NewPTEs() + p.archPageTables.rootPhysical = p.Allocator.PhysicalFor(p.archPageTables.root) + } } // cloneUpperShared clone the upper from the upper shared page tables. @@ -59,7 +62,8 @@ func (p *PageTables) cloneUpperShared() { panic("upperStart should be the same as upperBottom") } - // nothing to do for arm. + p.archPageTables.root = p.upperSharedPageTables.archPageTables.root + p.archPageTables.rootPhysical = p.upperSharedPageTables.archPageTables.rootPhysical } // PTEs is a collection of entries. diff --git a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go index 157c9a7cc..c261d393a 100644 --- a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go +++ b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go @@ -116,7 +116,7 @@ func next(start uintptr, size uintptr) uintptr { func (w *Walker) iterateRangeCanonical(start, end uintptr) { pgdEntryIndex := w.pageTables.root if start >= upperBottom { - pgdEntryIndex = w.pageTables.upperSharedPageTables.root + pgdEntryIndex = w.pageTables.archPageTables.root } for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ { |