diff options
32 files changed, 528 insertions, 421 deletions
diff --git a/pkg/ring0/BUILD b/pkg/ring0/BUILD index d1b14efdb..885958456 100644 --- a/pkg/ring0/BUILD +++ b/pkg/ring0/BUILD @@ -80,6 +80,7 @@ go_library( "//pkg/ring0/pagetables", "//pkg/safecopy", "//pkg/sentry/arch", + "//pkg/sentry/arch/fpu", "//pkg/usermem", ], ) diff --git a/pkg/ring0/defs.go b/pkg/ring0/defs.go index e2561e4c2..b6e2012e8 100644 --- a/pkg/ring0/defs.go +++ b/pkg/ring0/defs.go @@ -17,6 +17,7 @@ package ring0 import ( "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" ) // Kernel is a global kernel object. @@ -96,7 +97,7 @@ type SwitchOpts struct { // FloatingPointState is a byte pointer where floating point state is // saved and restored. - FloatingPointState *byte + FloatingPointState *fpu.State // PageTables are the application page tables. PageTables *pagetables.PageTables diff --git a/pkg/ring0/gen_offsets/BUILD b/pkg/ring0/gen_offsets/BUILD index 15b93d61c..f421e1687 100644 --- a/pkg/ring0/gen_offsets/BUILD +++ b/pkg/ring0/gen_offsets/BUILD @@ -35,6 +35,7 @@ go_binary( "//pkg/cpuid", "//pkg/ring0/pagetables", "//pkg/sentry/arch", + "//pkg/sentry/arch/fpu", "//pkg/usermem", ], ) diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go index 36a60700e..33c259757 100644 --- a/pkg/ring0/kernel_amd64.go +++ b/pkg/ring0/kernel_amd64.go @@ -239,17 +239,17 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { regs.Ss = uint64(Udata) // Ditto. // Perform the switch. - swapgs() // GS will be swapped on return. - WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS. - WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS. - LoadFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy in floating point. + swapgs() // GS will be swapped on return. + WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS. + WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS. + LoadFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy in floating point. if switchOpts.FullRestore { vector = iret(c, regs, uintptr(userCR3)) } else { vector = sysret(c, regs, uintptr(userCR3)) } - SaveFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy out floating point. - WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS. + SaveFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy out floating point. + WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS. return } diff --git a/pkg/ring0/kernel_arm64.go b/pkg/ring0/kernel_arm64.go index 41909b3a0..7975e5f92 100644 --- a/pkg/ring0/kernel_arm64.go +++ b/pkg/ring0/kernel_arm64.go @@ -62,7 +62,7 @@ func IsCanonical(addr uint64) bool { //go:nosplit func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { storeAppASID(uintptr(switchOpts.UserASID)) - storeEl0Fpstate(switchOpts.FloatingPointState) + storeEl0Fpstate(switchOpts.FloatingPointState.BytePointer()) if switchOpts.Flush { FlushTlbByASID(uintptr(switchOpts.UserASID)) @@ -82,7 +82,7 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { fpDisableTrap = CPACREL1() if fpDisableTrap != 0 { - SaveFloatingPoint(switchOpts.FloatingPointState) + SaveFloatingPoint(switchOpts.FloatingPointState.BytePointer()) } vector = c.vecCode diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 85278b389..f660f1614 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -9,7 +9,6 @@ go_library( "arch.go", "arch_aarch64.go", "arch_amd64.go", - "arch_amd64.s", "arch_arm64.go", "arch_state_x86.go", "arch_x86.go", @@ -36,8 +35,8 @@ go_library( "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", + "//pkg/sentry/arch/fpu", "//pkg/sentry/limits", - "//pkg/sync", "//pkg/syserror", "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go index dd2effdf9..921151137 100644 --- a/pkg/sentry/arch/arch.go +++ b/pkg/sentry/arch/arch.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/usermem" ) @@ -50,12 +51,6 @@ func (a Arch) String() string { } } -// FloatingPointData is a generic type, and will always be passed as a pointer. -// We rely on the individual arch implementations to meet all the necessary -// requirements. For example, on x86 the region must be 16-byte aligned and 512 -// bytes in size. -type FloatingPointData byte - // Context provides architecture-dependent information for a specific thread. // // NOTE(b/34169503): Currently we use uintptr here to refer to a generic native @@ -187,7 +182,7 @@ type Context interface { ClearSingleStep() // FloatingPointData will be passed to underlying save routines. - FloatingPointData() *FloatingPointData + FloatingPointData() *fpu.State // NewMmapLayout returns a layout for a new MM, where MinAddr for the // returned layout must be no lower than min, and MaxAddr for the returned @@ -221,16 +216,6 @@ type Context interface { // number of bytes read. PtraceSetRegs(src io.Reader) (int, error) - // PtraceGetFPRegs implements ptrace(PTRACE_GETFPREGS) by writing the - // floating-point registers represented by this Context to addr in dst and - // returning the number of bytes written. - PtraceGetFPRegs(dst io.Writer) (int, error) - - // PtraceSetFPRegs implements ptrace(PTRACE_SETFPREGS) by reading - // floating-point registers from src into this Context and returning the - // number of bytes read. - PtraceSetFPRegs(src io.Reader) (int, error) - // PtraceGetRegSet implements ptrace(PTRACE_GETREGSET) by writing the // register set given by architecture-defined value regset from this // Context to dst and returning the number of bytes written, which must be @@ -365,18 +350,3 @@ func (a SyscallArgument) SizeT() uint { func (a SyscallArgument) ModeT() uint { return uint(uint16(a.Value)) } - -// ErrFloatingPoint indicates a failed restore due to unusable floating point -// state. -type ErrFloatingPoint struct { - // supported is the supported floating point state. - supported uint64 - - // saved is the saved floating point state. - saved uint64 -} - -// Error returns a sensible description of the restore error. -func (e ErrFloatingPoint) Error() string { - return fmt.Sprintf("floating point state contains unsupported features; supported: %#x saved: %#x", e.supported, e.saved) -} diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index fd73751e7..08789f517 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" "gvisor.dev/gvisor/pkg/syserror" ) @@ -40,65 +41,11 @@ type Registers struct { const ( // SyscallWidth is the width of insturctions. SyscallWidth = 4 - - // fpsimdMagic is the magic number which is used in fpsimd_context. - fpsimdMagic = 0x46508001 - - // fpsimdContextSize is the size of fpsimd_context. - fpsimdContextSize = 0x210 ) // ARMTrapFlag is the mask for the trap flag. const ARMTrapFlag = uint64(1) << 21 -// aarch64FPState is aarch64 floating point state. -type aarch64FPState []byte - -// initAarch64FPState sets up initial state. -// -// Related code in Linux kernel: fpsimd_flush_thread(). -// FPCR = FPCR_RM_RN (0x0 << 22). -// -// Currently, aarch64FPState is only a space of 0x210 length for fpstate. -// The fp head is useless in sentry/ptrace/kvm. -// -func initAarch64FPState(data aarch64FPState) { -} - -func newAarch64FPStateSlice() []byte { - return alignedBytes(4096, 16)[:fpsimdContextSize] -} - -// newAarch64FPState returns an initialized floating point state. -// -// The returned state is large enough to store all floating point state -// supported by host, even if the app won't use much of it due to a restricted -// FeatureSet. -func newAarch64FPState() aarch64FPState { - f := aarch64FPState(newAarch64FPStateSlice()) - initAarch64FPState(f) - return f -} - -// fork creates and returns an identical copy of the aarch64 floating point state. -func (f aarch64FPState) fork() aarch64FPState { - n := aarch64FPState(newAarch64FPStateSlice()) - copy(n, f) - return n -} - -// FloatingPointData returns the raw data pointer. -func (f aarch64FPState) FloatingPointData() *FloatingPointData { - return (*FloatingPointData)(&f[0]) -} - -// NewFloatingPointData returns a new floating point data blob. -// -// This is primarily for use in tests. -func NewFloatingPointData() *FloatingPointData { - return (*FloatingPointData)(&(newAarch64FPState()[0])) -} - // State contains the common architecture bits for aarch64 (the build tag of this // file ensures it's only built on aarch64). // @@ -108,7 +55,7 @@ type State struct { Regs Registers // Our floating point state. - aarch64FPState `state:"wait"` + fpState fpu.State `state:"wait"` // FeatureSet is a pointer to the currently active feature set. FeatureSet *cpuid.FeatureSet @@ -162,10 +109,10 @@ func (s State) Proto() *rpb.Registers { // Fork creates and returns an identical copy of the state. func (s *State) Fork() State { return State{ - Regs: s.Regs, - aarch64FPState: s.aarch64FPState.fork(), - FeatureSet: s.FeatureSet, - OrigR0: s.OrigR0, + Regs: s.Regs, + fpState: s.fpState.Fork(), + FeatureSet: s.FeatureSet, + OrigR0: s.OrigR0, } } @@ -318,10 +265,10 @@ func New(arch Arch, fs *cpuid.FeatureSet) Context { case ARM64: return &context64{ State{ - aarch64FPState: newAarch64FPState(), - FeatureSet: fs, + fpState: fpu.NewState(), + FeatureSet: fs, }, - []aarch64FPState(nil), + []fpu.State(nil), } } panic(fmt.Sprintf("unknown architecture %v", arch)) diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go index 15d8ddb40..2571be60f 100644 --- a/pkg/sentry/arch/arch_amd64.go +++ b/pkg/sentry/arch/arch_amd64.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/usermem" ) @@ -105,7 +106,7 @@ const ( // +stateify savable type context64 struct { State - sigFPState []x86FPState // fpstate to be restored on sigreturn. + sigFPState []fpu.State // fpstate to be restored on sigreturn. } // Arch implements Context.Arch. @@ -113,14 +114,18 @@ func (c *context64) Arch() Arch { return AMD64 } -func (c *context64) copySigFPState() []x86FPState { - var sigfps []x86FPState +func (c *context64) copySigFPState() []fpu.State { + var sigfps []fpu.State for _, s := range c.sigFPState { - sigfps = append(sigfps, s.fork()) + sigfps = append(sigfps, s.Fork()) } return sigfps } +func (c *context64) FloatingPointData() *fpu.State { + return &c.State.fpState +} + // Fork returns an exact copy of this context. func (c *context64) Fork() Context { return &context64{ diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index 0c61a3ff7..14ad9483b 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/usermem" ) @@ -79,7 +80,7 @@ const ( // +stateify savable type context64 struct { State - sigFPState []aarch64FPState // fpstate to be restored on sigreturn. + sigFPState []fpu.State // fpstate to be restored on sigreturn. } // Arch implements Context.Arch. @@ -87,10 +88,10 @@ func (c *context64) Arch() Arch { return ARM64 } -func (c *context64) copySigFPState() []aarch64FPState { - var sigfps []aarch64FPState +func (c *context64) copySigFPState() []fpu.State { + var sigfps []fpu.State for _, s := range c.sigFPState { - sigfps = append(sigfps, s.fork()) + sigfps = append(sigfps, s.Fork()) } return sigfps } @@ -286,3 +287,7 @@ func (c *context64) PtracePokeUser(addr, data uintptr) error { // TODO(gvisor.dev/issue/1239): Full ptrace supporting for Arm64. return nil } + +func (c *context64) FloatingPointData() *fpu.State { + return &c.State.fpState +} diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index 840e53d33..b2b94c304 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -16,59 +16,7 @@ package arch -import ( - "gvisor.dev/gvisor/pkg/cpuid" - "gvisor.dev/gvisor/pkg/usermem" -) - -// XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 -// and SSE state, so this is the equivalent XSTATE_BV value. -const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE - // afterLoadFPState is invoked by afterLoad. func (s *State) afterLoadFPState() { - old := s.x86FPState - - // Recreate the slice. This is done to ensure that it is aligned - // appropriately in memory, and large enough to accommodate any new - // state that may be saved by the new CPU. Even if extraneous new state - // is saved, the state we care about is guaranteed to be a subset of - // new state. Later optimizations can use less space when using a - // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has - // more info. - s.x86FPState = newX86FPState() - - // x86FPState always contains all the FP state supported by the host. - // We may have come from a newer machine that supports additional state - // which we cannot restore. - // - // The x86 FP state areas are backwards compatible, so we can simply - // truncate the additional floating point state. - // - // Applications should not depend on the truncated state because it - // should relate only to features that were not exposed in the app - // FeatureSet. However, because we do not *prevent* them from using - // this state, we must verify here that there is no in-use state - // (according to XSTATE_BV) which we do not support. - if len(s.x86FPState) < len(old) { - // What do we support? - supportedBV := fxsaveBV - if fs := cpuid.HostFeatureSet(); fs.UseXsave() { - supportedBV = fs.ValidXCR0Mask() - } - - // What was in use? - savedBV := fxsaveBV - if len(old) >= xstateBVOffset+8 { - savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:]) - } - - // Supported features must be a superset of saved features. - if savedBV&^supportedBV != 0 { - panic(ErrFloatingPoint{supported: supportedBV, saved: savedBV}) - } - } - - // Copy to the new, aligned location. - copy(s.x86FPState, old) + s.fpState.AfterLoad() } diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index 641ada92f..e8e52d3a8 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -24,10 +24,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // Registers represents the CPU registers for this architecture. @@ -111,57 +110,6 @@ var ( X86TrapFlag uint64 = (1 << 8) ) -// x86FPState is x86 floating point state. -type x86FPState []byte - -// initX86FPState (defined in asm files) sets up initial state. -func initX86FPState(data *FloatingPointData, useXsave bool) - -func newX86FPStateSlice() []byte { - size, align := cpuid.HostFeatureSet().ExtendedStateSize() - capacity := size - // Always use at least 4096 bytes. - // - // For the KVM platform, this state is a fixed 4096 bytes, so make sure - // that the underlying array is at _least_ that size otherwise we will - // corrupt random memory. This is not a pleasant thing to debug. - if capacity < 4096 { - capacity = 4096 - } - return alignedBytes(capacity, align)[:size] -} - -// newX86FPState returns an initialized floating point state. -// -// The returned state is large enough to store all floating point state -// supported by host, even if the app won't use much of it due to a restricted -// FeatureSet. Since they may still be able to see state not advertised by -// CPUID we must ensure it does not contain any sentry state. -func newX86FPState() x86FPState { - f := x86FPState(newX86FPStateSlice()) - initX86FPState(f.FloatingPointData(), cpuid.HostFeatureSet().UseXsave()) - return f -} - -// fork creates and returns an identical copy of the x86 floating point state. -func (f x86FPState) fork() x86FPState { - n := x86FPState(newX86FPStateSlice()) - copy(n, f) - return n -} - -// FloatingPointData returns the raw data pointer. -func (f x86FPState) FloatingPointData() *FloatingPointData { - return (*FloatingPointData)(&f[0]) -} - -// NewFloatingPointData returns a new floating point data blob. -// -// This is primarily for use in tests. -func NewFloatingPointData() *FloatingPointData { - return (*FloatingPointData)(&(newX86FPState()[0])) -} - // Proto returns a protobuf representation of the system registers in State. func (s State) Proto() *rpb.Registers { regs := &rpb.AMD64Registers{ @@ -200,7 +148,7 @@ func (s State) Proto() *rpb.Registers { func (s *State) Fork() State { return State{ Regs: s.Regs, - x86FPState: s.x86FPState.fork(), + fpState: s.fpState.Fork(), FeatureSet: s.FeatureSet, } } @@ -393,149 +341,6 @@ func isValidSegmentBase(reg uint64) bool { return reg < uint64(maxAddr64) } -// ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type -// manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, -// ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. -const ptraceFPRegsSize = 512 - -// PtraceGetFPRegs implements Context.PtraceGetFPRegs. -func (s *State) PtraceGetFPRegs(dst io.Writer) (int, error) { - return dst.Write(s.x86FPState[:ptraceFPRegsSize]) -} - -// PtraceSetFPRegs implements Context.PtraceSetFPRegs. -func (s *State) PtraceSetFPRegs(src io.Reader) (int, error) { - var f [ptraceFPRegsSize]byte - n, err := io.ReadFull(src, f[:]) - if err != nil { - return 0, err - } - // Force reserved bits in MXCSR to 0. This is consistent with Linux. - sanitizeMXCSR(x86FPState(f[:])) - // N.B. this only copies the beginning of the FP state, which - // corresponds to the FXSAVE area. - copy(s.x86FPState, f[:]) - return n, nil -} - -const ( - // mxcsrOffset is the offset in bytes of the MXCSR field from the start of - // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE - // Area") - mxcsrOffset = 24 - - // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the - // start of the FXSAVE area. - mxcsrMaskOffset = 28 -) - -var ( - mxcsrMask uint32 - initMXCSRMask sync.Once -) - -// sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR -// generates a general-protection fault (#GP) in response to an attempt to set -// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section -// 10.5.1.2 "SSE State") -func sanitizeMXCSR(f x86FPState) { - mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:]) - initMXCSRMask.Do(func() { - temp := x86FPState(alignedBytes(uint(ptraceFPRegsSize), 16)) - initX86FPState(temp.FloatingPointData(), false /* useXsave */) - mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) - if mxcsrMask == 0 { - // "If the value of the MXCSR_MASK field is 00000000H, then the - // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM - // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR - // Register" - mxcsrMask = 0xffbf - } - }) - mxcsr &= mxcsrMask - usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) -} - -const ( - // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal - // to the size of the XSAVE legacy area (512 bytes) plus the size of the - // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's - // X86_XSTATE_SSE_SIZE. - minXstateBytes = 512 + 64 - - // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD - // field in Linux's struct user_xstateregs, which is the type manipulated - // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, - // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. - userXstateXCR0Offset = 464 - - // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 - // XSAVE area. - xstateBVOffset = 512 - - // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the - // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is - // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE - // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". - // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP - // exceptions resulting from invalid values; we aren't. Linux also never - // uses the compacted format when doing XSAVE and doesn't even define the - // compaction extensions to XSAVE as a CPU feature, so for simplicity we - // assume no one is using them. - xsaveHeaderZeroedOffset = 512 + 8 - xsaveHeaderZeroedBytes = 64 - 8 -) - -func (s *State) ptraceGetXstateRegs(dst io.Writer, maxlen int) (int, error) { - // N.B. s.x86FPState may contain more state than the application - // expects. We only copy the subset that would be in their XSAVE area. - ess, _ := s.FeatureSet.ExtendedStateSize() - f := make([]byte, ess) - copy(f, s.x86FPState) - // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are - // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE - // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE - // mask. GDB relies on this: see - // gdb/x86-linux-nat.c:x86_linux_read_description(). - usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], s.FeatureSet.ValidXCR0Mask()) - if len(f) > maxlen { - f = f[:maxlen] - } - return dst.Write(f) -} - -func (s *State) ptraceSetXstateRegs(src io.Reader, maxlen int) (int, error) { - // Allow users to pass an xstate register set smaller than ours (they can - // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. - // Also allow users to pass a register set larger than ours; anything after - // their ExtendedStateSize will be ignored. (I think Linux technically - // permits setting a register set smaller than minXstateBytes, but it has - // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) - if maxlen < minXstateBytes { - return 0, unix.EFAULT - } - ess, _ := s.FeatureSet.ExtendedStateSize() - if maxlen > int(ess) { - maxlen = int(ess) - } - f := make([]byte, maxlen) - if _, err := io.ReadFull(src, f); err != nil { - return 0, err - } - // Force reserved bits in MXCSR to 0. This is consistent with Linux. - sanitizeMXCSR(x86FPState(f)) - // Users can't enable *more* XCR0 bits than what we, and the CPU, support. - xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:]) - xstateBV &= s.FeatureSet.ValidXCR0Mask() - usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) - // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. - reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] - for i := range reserved { - reserved[i] = 0 - } - return copy(s.x86FPState, f), nil -} - // Register sets defined in include/uapi/linux/elf.h. const ( _NT_PRSTATUS = 1 @@ -552,12 +357,9 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, } return s.PtraceGetRegs(dst) case _NT_PRFPREG: - if maxlen < ptraceFPRegsSize { - return 0, syserror.EFAULT - } - return s.PtraceGetFPRegs(dst) + return s.fpState.PtraceGetFPRegs(dst, maxlen) case _NT_X86_XSTATE: - return s.ptraceGetXstateRegs(dst, maxlen) + return s.fpState.PtraceGetXstateRegs(dst, maxlen, s.FeatureSet) default: return 0, syserror.EINVAL } @@ -572,12 +374,9 @@ func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, } return s.PtraceSetRegs(src) case _NT_PRFPREG: - if maxlen < ptraceFPRegsSize { - return 0, syserror.EFAULT - } - return s.PtraceSetFPRegs(src) + return s.fpState.PtraceSetFPRegs(src, maxlen) case _NT_X86_XSTATE: - return s.ptraceSetXstateRegs(src, maxlen) + return s.fpState.PtraceSetXstateRegs(src, maxlen, s.FeatureSet) default: return 0, syserror.EINVAL } @@ -609,10 +408,10 @@ func New(arch Arch, fs *cpuid.FeatureSet) Context { case AMD64: return &context64{ State{ - x86FPState: newX86FPState(), + fpState: fpu.NewState(), FeatureSet: fs, }, - []x86FPState(nil), + []fpu.State(nil), } } panic(fmt.Sprintf("unknown architecture %v", arch)) diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go index 0c73fcbfb..5d7b99bd9 100644 --- a/pkg/sentry/arch/arch_x86_impl.go +++ b/pkg/sentry/arch/arch_x86_impl.go @@ -18,6 +18,7 @@ package arch import ( "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" ) // State contains the common architecture bits for X86 (the build tag of this @@ -29,7 +30,7 @@ type State struct { Regs Registers // Our floating point state. - x86FPState `state:"wait"` + fpState fpu.State `state:"wait"` // FeatureSet is a pointer to the currently active feature set. FeatureSet *cpuid.FeatureSet diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD new file mode 100644 index 000000000..0a5395267 --- /dev/null +++ b/pkg/sentry/arch/fpu/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "fpu", + srcs = [ + "fpu.go", + "fpu_amd64.go", + "fpu_amd64.s", + "fpu_arm64.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/cpuid", + "//pkg/sync", + "//pkg/syserror", + "//pkg/usermem", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/sentry/arch/fpu/fpu.go b/pkg/sentry/arch/fpu/fpu.go new file mode 100644 index 000000000..867d309a3 --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu.go @@ -0,0 +1,54 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fpu provides basic floating point helpers. +package fpu + +import ( + "fmt" + "reflect" +) + +// State represents floating point state. +// +// This is a simple byte slice, but may have architecture-specific methods +// attached to it. +type State []byte + +// ErrLoadingState indicates a failed restore due to unusable floating point +// state. +type ErrLoadingState struct { + // supported is the supported floating point state. + supportedFeatures uint64 + + // saved is the saved floating point state. + savedFeatures uint64 +} + +// Error returns a sensible description of the restore error. +func (e ErrLoadingState) Error() string { + return fmt.Sprintf("floating point state contains unsupported features; supported: %#x saved: %#x", e.supportedFeatures, e.savedFeatures) +} + +// alignedBytes returns a slice of size bytes, aligned in memory to the given +// alignment. This is used because we require certain structures to be aligned +// in a specific way (for example, the X86 floating point data). +func alignedBytes(size, alignment uint) []byte { + data := make([]byte, size+alignment-1) + offset := uint(reflect.ValueOf(data).Index(0).Addr().Pointer() % uintptr(alignment)) + if offset == 0 { + return data[:size:size] + } + return data[alignment-offset:][:size:size] +} diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go new file mode 100644 index 000000000..3a62f51be --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_amd64.go @@ -0,0 +1,280 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 i386 + +package fpu + +import ( + "io" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// initX86FPState (defined in asm files) sets up initial state. +func initX86FPState(data *byte, useXsave bool) + +func newX86FPStateSlice() State { + size, align := cpuid.HostFeatureSet().ExtendedStateSize() + capacity := size + // Always use at least 4096 bytes. + // + // For the KVM platform, this state is a fixed 4096 bytes, so make sure + // that the underlying array is at _least_ that size otherwise we will + // corrupt random memory. This is not a pleasant thing to debug. + if capacity < 4096 { + capacity = 4096 + } + return alignedBytes(capacity, align)[:size] +} + +// NewState returns an initialized floating point state. +// +// The returned state is large enough to store all floating point state +// supported by host, even if the app won't use much of it due to a restricted +// FeatureSet. Since they may still be able to see state not advertised by +// CPUID we must ensure it does not contain any sentry state. +func NewState() State { + f := newX86FPStateSlice() + initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) + return f +} + +// Fork creates and returns an identical copy of the x86 floating point state. +func (s *State) Fork() State { + n := newX86FPStateSlice() + copy(n, *s) + return n +} + +// ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type +// manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, +// ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. +const ptraceFPRegsSize = 512 + +// PtraceGetFPRegs implements Context.PtraceGetFPRegs. +func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { + if maxlen < ptraceFPRegsSize { + return 0, syserror.EFAULT + } + + return dst.Write((*s)[:ptraceFPRegsSize]) +} + +// PtraceSetFPRegs implements Context.PtraceSetFPRegs. +func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { + if maxlen < ptraceFPRegsSize { + return 0, syserror.EFAULT + } + + var f [ptraceFPRegsSize]byte + n, err := io.ReadFull(src, f[:]) + if err != nil { + return 0, err + } + // Force reserved bits in MXCSR to 0. This is consistent with Linux. + sanitizeMXCSR(State(f[:])) + // N.B. this only copies the beginning of the FP state, which + // corresponds to the FXSAVE area. + copy(*s, f[:]) + return n, nil +} + +const ( + // mxcsrOffset is the offset in bytes of the MXCSR field from the start of + // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE + // Area") + mxcsrOffset = 24 + + // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the + // start of the FXSAVE area. + mxcsrMaskOffset = 28 +) + +var ( + mxcsrMask uint32 + initMXCSRMask sync.Once +) + +const ( + // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal + // to the size of the XSAVE legacy area (512 bytes) plus the size of the + // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's + // X86_XSTATE_SSE_SIZE. + minXstateBytes = 512 + 64 + + // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD + // field in Linux's struct user_xstateregs, which is the type manipulated + // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, + // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. + userXstateXCR0Offset = 464 + + // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 + // XSAVE area. + xstateBVOffset = 512 + + // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the + // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is + // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE + // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". + // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP + // exceptions resulting from invalid values; we aren't. Linux also never + // uses the compacted format when doing XSAVE and doesn't even define the + // compaction extensions to XSAVE as a CPU feature, so for simplicity we + // assume no one is using them. + xsaveHeaderZeroedOffset = 512 + 8 + xsaveHeaderZeroedBytes = 64 - 8 +) + +// sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR +// generates a general-protection fault (#GP) in response to an attempt to set +// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section +// 10.5.1.2 "SSE State") +func sanitizeMXCSR(f State) { + mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:]) + initMXCSRMask.Do(func() { + temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) + initX86FPState(&temp[0], false /* useXsave */) + mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) + if mxcsrMask == 0 { + // "If the value of the MXCSR_MASK field is 00000000H, then the + // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM + // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR + // Register" + mxcsrMask = 0xffbf + } + }) + mxcsr &= mxcsrMask + usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) +} + +// PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by +// writing the floating point registers from this state to dst and returning the +// number of bytes written, which must be less than or equal to maxlen. +func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid.FeatureSet) (int, error) { + // N.B. s.x86FPState may contain more state than the application + // expects. We only copy the subset that would be in their XSAVE area. + ess, _ := featureSet.ExtendedStateSize() + f := make([]byte, ess) + copy(f, *s) + // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are + // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE + // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE + // mask. GDB relies on this: see + // gdb/x86-linux-nat.c:x86_linux_read_description(). + usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) + if len(f) > maxlen { + f = f[:maxlen] + } + return dst.Write(f) +} + +// PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by +// reading floating point registers from src and returning the number of bytes +// read, which must be less than or equal to maxlen. +func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid.FeatureSet) (int, error) { + // Allow users to pass an xstate register set smaller than ours (they can + // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. + // Also allow users to pass a register set larger than ours; anything after + // their ExtendedStateSize will be ignored. (I think Linux technically + // permits setting a register set smaller than minXstateBytes, but it has + // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) + if maxlen < minXstateBytes { + return 0, unix.EFAULT + } + ess, _ := featureSet.ExtendedStateSize() + if maxlen > int(ess) { + maxlen = int(ess) + } + f := make([]byte, maxlen) + if _, err := io.ReadFull(src, f); err != nil { + return 0, err + } + // Force reserved bits in MXCSR to 0. This is consistent with Linux. + sanitizeMXCSR(State(f)) + // Users can't enable *more* XCR0 bits than what we, and the CPU, support. + xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:]) + xstateBV &= featureSet.ValidXCR0Mask() + usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) + // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. + reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] + for i := range reserved { + reserved[i] = 0 + } + return copy(*s, f), nil +} + +// BytePointer returns a pointer to the first byte of the state. +// +//go:nosplit +func (s *State) BytePointer() *byte { + return &(*s)[0] +} + +// XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 +// and SSE state, so this is the equivalent XSTATE_BV value. +const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE + +// AfterLoad converts the loaded state to the format that compatible with the +// current processor. +func (s *State) AfterLoad() { + old := *s + + // Recreate the slice. This is done to ensure that it is aligned + // appropriately in memory, and large enough to accommodate any new + // state that may be saved by the new CPU. Even if extraneous new state + // is saved, the state we care about is guaranteed to be a subset of + // new state. Later optimizations can use less space when using a + // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has + // more info. + *s = NewState() + + // x86FPState always contains all the FP state supported by the host. + // We may have come from a newer machine that supports additional state + // which we cannot restore. + // + // The x86 FP state areas are backwards compatible, so we can simply + // truncate the additional floating point state. + // + // Applications should not depend on the truncated state because it + // should relate only to features that were not exposed in the app + // FeatureSet. However, because we do not *prevent* them from using + // this state, we must verify here that there is no in-use state + // (according to XSTATE_BV) which we do not support. + if len(*s) < len(old) { + // What do we support? + supportedBV := fxsaveBV + if fs := cpuid.HostFeatureSet(); fs.UseXsave() { + supportedBV = fs.ValidXCR0Mask() + } + + // What was in use? + savedBV := fxsaveBV + if len(old) >= xstateBVOffset+8 { + savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:]) + } + + // Supported features must be a superset of saved features. + if savedBV&^supportedBV != 0 { + panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) + } + } + + // Copy to the new, aligned location. + copy(*s, old) +} diff --git a/pkg/sentry/arch/arch_amd64.s b/pkg/sentry/arch/fpu/fpu_amd64.s index 6c10336e7..6c10336e7 100644 --- a/pkg/sentry/arch/arch_amd64.s +++ b/pkg/sentry/arch/fpu/fpu_amd64.s diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go new file mode 100644 index 000000000..d2f62631d --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_arm64.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package fpu + +const ( + // fpsimdMagic is the magic number which is used in fpsimd_context. + fpsimdMagic = 0x46508001 + + // fpsimdContextSize is the size of fpsimd_context. + fpsimdContextSize = 0x210 +) + +// initAarch64FPState sets up initial state. +// +// Related code in Linux kernel: fpsimd_flush_thread(). +// FPCR = FPCR_RM_RN (0x0 << 22). +// +// Currently, aarch64FPState is only a space of 0x210 length for fpstate. +// The fp head is useless in sentry/ptrace/kvm. +// +func initAarch64FPState(data *State) { +} + +func newAarch64FPStateSlice() []byte { + return alignedBytes(4096, 16)[:fpsimdContextSize] +} + +// NewState returns an initialized floating point state. +// +// The returned state is large enough to store all floating point state +// supported by host, even if the app won't use much of it due to a restricted +// FeatureSet. +func NewState() State { + f := State(newAarch64FPStateSlice()) + initAarch64FPState(&f) + return f +} + +// Fork creates and returns an identical copy of the aarch64 floating point state. +func (s *State) Fork() State { + n := State(newAarch64FPStateSlice()) + copy(n, *s) + return n +} + +// BytePointer returns a pointer to the first byte of the state. +func (s *State) BytePointer() *byte { + return &(*s)[0] +} diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index e6557cab6..ee3743483 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal/primitive" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/usermem" ) @@ -98,7 +99,7 @@ func (c *context64) NewSignalStack() NativeSignalStack { const _FP_XSTATE_MAGIC2_SIZE = 4 func (c *context64) fpuFrameSize() (size int, useXsave bool) { - size = len(c.x86FPState) + size = len(c.fpState) if size > 512 { // Make room for the magic cookie at the end of the xsave frame. size += _FP_XSTATE_MAGIC2_SIZE @@ -226,10 +227,10 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt c.Regs.Ss = userDS // Save the thread's floating point state. - c.sigFPState = append(c.sigFPState, c.x86FPState) + c.sigFPState = append(c.sigFPState, c.fpState) // Signal handler gets a clean floating point state. - c.x86FPState = newX86FPState() + c.fpState = fpu.NewState() return nil } @@ -273,7 +274,7 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt // Restore floating point state. l := len(c.sigFPState) if l > 0 { - c.x86FPState = c.sigFPState[l-1] + c.fpState = c.sigFPState[l-1] // NOTE(cl/133042258): State save requires that any slice // elements from '[len:cap]' to be zero value. c.sigFPState[l-1] = nil diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go index 4491008c2..53281dcba 100644 --- a/pkg/sentry/arch/signal_arm64.go +++ b/pkg/sentry/arch/signal_arm64.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/usermem" ) @@ -139,9 +140,9 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt c.Regs.Regs[30] = uint64(act.Restorer) // Save the thread's floating point state. - c.sigFPState = append(c.sigFPState, c.aarch64FPState) + c.sigFPState = append(c.sigFPState, c.fpState) // Signal handler gets a clean floating point state. - c.aarch64FPState = newAarch64FPState() + c.fpState = fpu.NewState() return nil } @@ -166,7 +167,7 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt // Restore floating point state. l := len(c.sigFPState) if l > 0 { - c.aarch64FPState = c.sigFPState[l-1] + c.fpState = c.sigFPState[l-1] // NOTE(cl/133042258): State save requires that any slice // elements from '[len:cap]' to be zero value. c.sigFPState[l-1] = nil diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 609ad3941..7aea3dcd8 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -51,14 +51,15 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) erro return err case linux.PTRACE_GETFPREGS: - _, err := target.Arch().PtraceGetFPRegs(&usermem.IOReadWriter{ + s := target.Arch().FloatingPointData() + _, err := target.Arch().FloatingPointData().PtraceGetFPRegs(&usermem.IOReadWriter{ Ctx: t, IO: t.MemoryManager(), Addr: data, Opts: usermem.IOOpts{ AddressSpaceActive: true, }, - }) + }, len(*s)) return err case linux.PTRACE_SETREGS: @@ -73,14 +74,15 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) erro return err case linux.PTRACE_SETFPREGS: - _, err := target.Arch().PtraceSetFPRegs(&usermem.IOReadWriter{ + s := target.Arch().FloatingPointData() + _, err := s.PtraceSetFPRegs(&usermem.IOReadWriter{ Ctx: t, IO: t.MemoryManager(), Addr: data, Opts: usermem.IOOpts{ AddressSpaceActive: true, }, - }) + }, len(*s)) return err default: diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 4f9e781af..03a76eb9b 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -50,6 +50,7 @@ go_library( "//pkg/safecopy", "//pkg/seccomp", "//pkg/sentry/arch", + "//pkg/sentry/arch/fpu", "//pkg/sentry/memmap", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", @@ -78,6 +79,7 @@ go_test( "//pkg/ring0", "//pkg/ring0/pagetables", "//pkg/sentry/arch", + "//pkg/sentry/arch/fpu", "//pkg/sentry/platform", "//pkg/sentry/platform/kvm/testutil", "//pkg/sentry/time", diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index f4b9a5321..d761bbdee 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -73,7 +73,7 @@ func (c *vCPU) KernelSyscall() { // We only trigger a bluepill entry in the bluepill function, and can // therefore be guaranteed that there is no floating point state to be // loaded on resuming from halt. We only worry about saving on exit. - ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no. + ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no. ring0.Halt() ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment. } @@ -92,7 +92,7 @@ func (c *vCPU) KernelException(vector ring0.Vector) { regs.Rip = 0 } // See above. - ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no. + ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no. ring0.Halt() ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment. } @@ -124,5 +124,5 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) { // Set the context pointer to the saved floating point state. This is // where the guest data has been serialized, the kernel will restore // from this new pointer value. - context.Fpstate = uint64(uintptrValue((*byte)(c.floatingPointState))) + context.Fpstate = uint64(uintptrValue(c.floatingPointState.BytePointer())) } diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go index e26b7da8d..578852c3f 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64.go @@ -92,7 +92,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) { lazyVfp := c.GetLazyVFP() if lazyVfp != 0 { - fpsimd := fpsimdPtr((*byte)(c.floatingPointState)) + fpsimd := fpsimdPtr(c.floatingPointState.BytePointer()) context.Fpsimd64.Fpsr = fpsimd.Fpsr context.Fpsimd64.Fpcr = fpsimd.Fpcr context.Fpsimd64.Vregs = fpsimd.Vregs @@ -112,12 +112,12 @@ func (c *vCPU) KernelSyscall() { fpDisableTrap := ring0.CPACREL1() if fpDisableTrap != 0 { - fpsimd := fpsimdPtr((*byte)(c.floatingPointState)) + fpsimd := fpsimdPtr(c.floatingPointState.BytePointer()) fpcr := ring0.GetFPCR() fpsr := ring0.GetFPSR() fpsimd.Fpcr = uint32(fpcr) fpsimd.Fpsr = uint32(fpsr) - ring0.SaveVRegs((*byte)(c.floatingPointState)) + ring0.SaveVRegs(c.floatingPointState.BytePointer()) } ring0.Halt() @@ -136,12 +136,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) { fpDisableTrap := ring0.CPACREL1() if fpDisableTrap != 0 { - fpsimd := fpsimdPtr((*byte)(c.floatingPointState)) + fpsimd := fpsimdPtr(c.floatingPointState.BytePointer()) fpcr := ring0.GetFPCR() fpsr := ring0.GetFPSR() fpsimd.Fpcr = uint32(fpcr) fpsimd.Fpsr = uint32(fpsr) - ring0.SaveVRegs((*byte)(c.floatingPointState)) + ring0.SaveVRegs(c.floatingPointState.BytePointer()) } ring0.Halt() diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index aeae01dbd..706fa53dc 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -65,7 +65,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a // Prepare switch options. switchOpts := ring0.SwitchOpts{ Registers: &ac.StateData().Regs, - FloatingPointState: (*byte)(ac.FloatingPointData()), + FloatingPointState: ac.FloatingPointData(), PageTables: localAS.pageTables, Flush: localAS.Touch(cpu), FullRestore: ac.FullRestore(), diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go index 76fc594a0..e44e995a0 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go @@ -33,7 +33,7 @@ func TestSegments(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, FullRestore: true, }, &si); err == platform.ErrContextInterrupt { diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 6243b9a04..5bce16dde 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -25,13 +25,14 @@ import ( "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" ktime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/usermem" ) -var dummyFPState = (*byte)(arch.NewFloatingPointData()) +var dummyFPState = fpu.NewState() type testHarness interface { Errorf(format string, args ...interface{}) @@ -159,7 +160,7 @@ func TestApplicationSyscall(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, FullRestore: true, }, &si); err == platform.ErrContextInterrupt { @@ -173,7 +174,7 @@ func TestApplicationSyscall(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { return true // Retry. @@ -190,7 +191,7 @@ func TestApplicationFault(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, FullRestore: true, }, &si); err == platform.ErrContextInterrupt { @@ -205,7 +206,7 @@ func TestApplicationFault(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { return true // Retry. @@ -223,7 +224,7 @@ func TestRegistersSyscall(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { continue // Retry. @@ -246,7 +247,7 @@ func TestRegistersFault(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, FullRestore: true, }, &si); err == platform.ErrContextInterrupt { @@ -272,7 +273,7 @@ func TestBounce(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err != platform.ErrContextInterrupt { t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) @@ -287,7 +288,7 @@ func TestBounce(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, FullRestore: true, }, &si); err != platform.ErrContextInterrupt { @@ -319,7 +320,7 @@ func TestBounceStress(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err != platform.ErrContextInterrupt { t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) @@ -340,7 +341,7 @@ func TestInvalidate(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { continue // Retry. @@ -355,7 +356,7 @@ func TestInvalidate(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, Flush: true, }, &si); err == platform.ErrContextInterrupt { @@ -379,7 +380,7 @@ func TestEmptyAddressSpace(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { return true // Retry. @@ -393,7 +394,7 @@ func TestEmptyAddressSpace(t *testing.T) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, FullRestore: true, }, &si); err == platform.ErrContextInterrupt { @@ -469,7 +470,7 @@ func BenchmarkApplicationSyscall(b *testing.B) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { a++ @@ -506,7 +507,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, - FloatingPointState: dummyFPState, + FloatingPointState: &dummyFPState, PageTables: pt, }, &si); err == platform.ErrContextInterrupt { a++ diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 6e583baa3..8f2c82e73 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" ktime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/usermem" @@ -70,7 +71,7 @@ type vCPUArchState struct { // floatingPointState is the floating point state buffer used in guest // to host transitions. See usage in bluepill_amd64.go. - floatingPointState *arch.FloatingPointData + floatingPointState fpu.State } const ( @@ -151,7 +152,7 @@ func (c *vCPU) initArchState() error { // This will be saved prior to leaving the guest, and we restore from // this always. We cannot use the pointer in the context alone because // we don't know how large the area there is in reality. - c.floatingPointState = arch.NewFloatingPointData() + c.floatingPointState = fpu.NewState() // Set the time offset to the host native time. return c.setSystemTime() diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index 7d7857067..2edc9d1b2 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/usermem" ) @@ -32,7 +33,7 @@ type vCPUArchState struct { // floatingPointState is the floating point state buffer used in guest // to host transitions. See usage in bluepill_arm64.go. - floatingPointState *arch.FloatingPointData + floatingPointState fpu.State } const ( diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 059aa43d0..e7d5f3193 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/usermem" ) @@ -150,7 +151,7 @@ func (c *vCPU) initArchState() error { c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs) } - c.floatingPointState = arch.NewFloatingPointData() + c.floatingPointState = fpu.NewState() return c.setSystemTime() } diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index fc43cc3c0..47efde6a2 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/safecopy", "//pkg/seccomp", "//pkg/sentry/arch", + "//pkg/sentry/arch/fpu", "//pkg/sentry/memmap", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go index 2c21f946e..01e73b019 100644 --- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go +++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/usermem" ) @@ -62,9 +63,9 @@ func (t *thread) setRegs(regs *arch.Registers) error { } // getFPRegs gets the floating-point data via the GETREGSET ptrace unix. -func (t *thread) getFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsave bool) error { +func (t *thread) getFPRegs(fpState *fpu.State, fpLen uint64, useXsave bool) error { iovec := unix.Iovec{ - Base: (*byte)(fpState), + Base: fpState.BytePointer(), Len: fpLen, } _, _, errno := unix.RawSyscall6( @@ -81,9 +82,9 @@ func (t *thread) getFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsa } // setFPRegs sets the floating-point data via the SETREGSET ptrace unix. -func (t *thread) setFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsave bool) error { +func (t *thread) setFPRegs(fpState *fpu.State, fpLen uint64, useXsave bool) error { iovec := unix.Iovec{ - Base: (*byte)(fpState), + Base: fpState.BytePointer(), Len: fpLen, } _, _, errno := unix.RawSyscall6( |