diff options
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64.go | 52 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/entry_amd64.s | 10 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_amd64.go | 21 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/offsets_amd64.go | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/x86.go | 15 |
5 files changed, 70 insertions, 31 deletions
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 7fcb7451f..7ac289756 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -150,13 +150,20 @@ func (c *vCPU) fault(signal int32) (*arch.SignalInfo, usermem.AccessType, error) // the code provided here. We need to re-execute. return nil, usermem.NoAccess, platform.ErrContextInterrupt } - info := &arch.SignalInfo{Signo: signal} + info := &arch.SignalInfo{ + Signo: signal, + } info.SetAddr(uint64(faultAddr)) accessType := usermem.AccessType{ Read: code&(1<<1) == 0, Write: code&(1<<1) != 0, Execute: code&(1<<4) != 0, } + if !accessType.Write && !accessType.Execute { + info.Code = 1 // SEGV_MAPERR. + } else { + info.Code = 2 // SEGV_ACCERR. + } return info, accessType, platform.ErrContextSignal } @@ -191,30 +198,55 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, user return c.fault(int32(syscall.SIGSEGV)) case ring0.Debug, ring0.Breakpoint: - info := &arch.SignalInfo{Signo: int32(syscall.SIGTRAP)} + info := &arch.SignalInfo{ + Signo: int32(syscall.SIGTRAP), + Code: 1, // TRAP_BRKPT (breakpoint). + } + info.SetAddr(switchOpts.Registers.Rip) // Include address. return info, usermem.AccessType{}, platform.ErrContextSignal case ring0.GeneralProtectionFault: - if !ring0.IsCanonical(switchOpts.Registers.Rip) { - // If the RIP is non-canonical, it's a SEGV. - info := &arch.SignalInfo{Signo: int32(syscall.SIGSEGV)} - return info, usermem.AccessType{}, platform.ErrContextSignal + info := &arch.SignalInfo{ + Signo: int32(syscall.SIGSEGV), + Code: arch.SignalInfoKernel, } - // Otherwise, we deliver a SIGBUS. - info := &arch.SignalInfo{Signo: int32(syscall.SIGBUS)} + info.SetAddr(switchOpts.Registers.Rip) // Include address. return info, usermem.AccessType{}, platform.ErrContextSignal case ring0.InvalidOpcode: - info := &arch.SignalInfo{Signo: int32(syscall.SIGILL)} + info := &arch.SignalInfo{ + Signo: int32(syscall.SIGILL), + Code: 1, // ILL_ILLOPC (illegal opcode). + } + info.SetAddr(switchOpts.Registers.Rip) // Include address. + return info, usermem.AccessType{}, platform.ErrContextSignal + + case ring0.DivideByZero: + info := &arch.SignalInfo{ + Signo: int32(syscall.SIGFPE), + Code: 1, // FPE_INTDIV (divide by zero). + } + info.SetAddr(switchOpts.Registers.Rip) // Include address. return info, usermem.AccessType{}, platform.ErrContextSignal case ring0.X87FloatingPointException: - info := &arch.SignalInfo{Signo: int32(syscall.SIGFPE)} + info := &arch.SignalInfo{ + Signo: int32(syscall.SIGFPE), + Code: 7, // FPE_FLTINV (invalid operation). + } + info.SetAddr(switchOpts.Registers.Rip) // Include address. return info, usermem.AccessType{}, platform.ErrContextSignal case ring0.Vector(bounce): return nil, usermem.NoAccess, platform.ErrContextInterrupt + case ring0.AlignmentCheck: + info := &arch.SignalInfo{ + Signo: int32(syscall.SIGBUS), + Code: 2, // BUS_ADRERR (physical address does not exist). + } + return info, usermem.NoAccess, platform.ErrContextSignal + case ring0.NMI: // An NMI is generated only when a fault is not servicable by // KVM itself, so we think some mapping is writeable but it's diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s index e8638133b..08c15ad65 100644 --- a/pkg/sentry/platform/ring0/entry_amd64.s +++ b/pkg/sentry/platform/ring0/entry_amd64.s @@ -248,10 +248,12 @@ TEXT ·exception(SB),NOSPLIT,$0 user: SWAP_GS() - XCHGQ CPU_REGISTERS+PTRACE_RAX(GS), AX // Swap for AX (regs). - REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX. - MOVQ CPU_REGISTERS+PTRACE_RAX(GS), BX // Load saved AX value. - MOVQ BX, PTRACE_RAX(AX) // Save everything else. + ADDQ $-8, SP // Adjust for flags. + MOVQ $_KERNEL_FLAGS, 0(SP); BYTE $0x9d; // Reset flags (POPFQ). + XCHGQ CPU_REGISTERS+PTRACE_RAX(GS), AX // Swap for user regs. + REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX. + MOVQ CPU_REGISTERS+PTRACE_RAX(GS), BX // Restore original AX. + MOVQ BX, PTRACE_RAX(AX) // Save it. MOVQ BX, PTRACE_ORIGRAX(AX) MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX) MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX) diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index 37d5484e1..c828af654 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -20,20 +20,6 @@ import ( "encoding/binary" ) -const ( - // KernelFlagsSet should always be set in the kernel. - KernelFlagsSet = _RFLAGS_RESERVED - - // UserFlagsSet are always set in userspace. - UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF - - // KernelFlagsClear should always be clear in the kernel. - KernelFlagsClear = _RFLAGS_IF | _RFLAGS_NT | _RFLAGS_IOPL - - // UserFlagsClear are always cleared in userspace. - UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL -) - // init initializes architecture-specific state. func (k *Kernel) init(opts KernelOpts) { // Save the root page tables. @@ -85,6 +71,9 @@ func (c *CPU) init() { c.registers.Ss = uint64(Kdata) c.registers.Fs = uint64(Kdata) c.registers.Gs = uint64(Kdata) + + // Set mandatory flags. + c.registers.Eflags = KernelFlagsSet } // StackTop returns the kernel's stack address. @@ -119,7 +108,7 @@ func (c *CPU) TSS() (uint64, uint16, *SegmentDescriptor) { // //go:nosplit func (c *CPU) CR0() uint64 { - return _CR0_PE | _CR0_PG | _CR0_ET + return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET } // CR4 returns the CPU's CR4 value. @@ -240,7 +229,7 @@ func start(c *CPU) { // Set the syscall target. wrmsr(_MSR_LSTAR, kernelFunc(sysenter)) - wrmsr(_MSR_SYSCALL_MASK, _RFLAGS_STEP|_RFLAGS_IF|_RFLAGS_DF|_RFLAGS_IOPL|_RFLAGS_AC|_RFLAGS_NT) + wrmsr(_MSR_SYSCALL_MASK, KernelFlagsClear|_RFLAGS_DF) // NOTE: This depends on having the 64-bit segments immediately // following the 32-bit user segments. This is simply the way the diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go index 9acd442ba..ca5fd456b 100644 --- a/pkg/sentry/platform/ring0/offsets_amd64.go +++ b/pkg/sentry/platform/ring0/offsets_amd64.go @@ -38,7 +38,8 @@ func Emit(w io.Writer) { fmt.Fprintf(w, "#define CPU_KERNEL_SYSCALL 0x%02x\n", reflect.ValueOf(&c.KernelSyscall).Pointer()-reflect.ValueOf(c).Pointer()) fmt.Fprintf(w, "\n// Bits.\n") - fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF) + fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF) + fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet) fmt.Fprintf(w, "\n// Vectors.\n") fmt.Fprintf(w, "#define DivideByZero 0x%02x\n", DivideByZero) diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go index 74b140066..3d437a77c 100644 --- a/pkg/sentry/platform/ring0/x86.go +++ b/pkg/sentry/platform/ring0/x86.go @@ -24,6 +24,7 @@ import ( const ( _CR0_PE = 1 << 0 _CR0_ET = 1 << 4 + _CR0_AM = 1 << 18 _CR0_PG = 1 << 31 _CR4_PSE = 1 << 4 @@ -55,6 +56,20 @@ const ( _MSR_SYSCALL_MASK = 0xc0000084 ) +const ( + // KernelFlagsSet should always be set in the kernel. + KernelFlagsSet = _RFLAGS_RESERVED + + // UserFlagsSet are always set in userspace. + UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF + + // KernelFlagsClear should always be clear in the kernel. + KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT + + // UserFlagsClear are always cleared in userspace. + UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL +) + // Vector is an exception vector. type Vector uintptr |