diff options
author | Andrei Vagin <avagin@google.com> | 2021-03-23 18:44:38 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-03-23 18:46:37 -0700 |
commit | 56a9a13976ad800a8a34b194d35f0169d0a0bb23 (patch) | |
tree | cb4b7c4352dc90a8c4c4f469c788fd2c5c6fd0dd /pkg/sentry/arch/fpu | |
parent | 960155cdaad49ccea07e45152f124beeb7e7fdcc (diff) |
Move the code that manages floating-point state to a separate package
This change is inspired by Adin's cl/355256448.
PiperOrigin-RevId: 364695931
Diffstat (limited to 'pkg/sentry/arch/fpu')
-rw-r--r-- | pkg/sentry/arch/fpu/BUILD | 21 | ||||
-rw-r--r-- | pkg/sentry/arch/fpu/fpu.go | 54 | ||||
-rw-r--r-- | pkg/sentry/arch/fpu/fpu_amd64.go | 280 | ||||
-rw-r--r-- | pkg/sentry/arch/fpu/fpu_amd64.s | 136 | ||||
-rw-r--r-- | pkg/sentry/arch/fpu/fpu_arm64.go | 63 |
5 files changed, 554 insertions, 0 deletions
diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD new file mode 100644 index 000000000..0a5395267 --- /dev/null +++ b/pkg/sentry/arch/fpu/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "fpu", + srcs = [ + "fpu.go", + "fpu_amd64.go", + "fpu_amd64.s", + "fpu_arm64.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/cpuid", + "//pkg/sync", + "//pkg/syserror", + "//pkg/usermem", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/sentry/arch/fpu/fpu.go b/pkg/sentry/arch/fpu/fpu.go new file mode 100644 index 000000000..867d309a3 --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu.go @@ -0,0 +1,54 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fpu provides basic floating point helpers. +package fpu + +import ( + "fmt" + "reflect" +) + +// State represents floating point state. +// +// This is a simple byte slice, but may have architecture-specific methods +// attached to it. +type State []byte + +// ErrLoadingState indicates a failed restore due to unusable floating point +// state. +type ErrLoadingState struct { + // supported is the supported floating point state. + supportedFeatures uint64 + + // saved is the saved floating point state. + savedFeatures uint64 +} + +// Error returns a sensible description of the restore error. +func (e ErrLoadingState) Error() string { + return fmt.Sprintf("floating point state contains unsupported features; supported: %#x saved: %#x", e.supportedFeatures, e.savedFeatures) +} + +// alignedBytes returns a slice of size bytes, aligned in memory to the given +// alignment. This is used because we require certain structures to be aligned +// in a specific way (for example, the X86 floating point data). +func alignedBytes(size, alignment uint) []byte { + data := make([]byte, size+alignment-1) + offset := uint(reflect.ValueOf(data).Index(0).Addr().Pointer() % uintptr(alignment)) + if offset == 0 { + return data[:size:size] + } + return data[alignment-offset:][:size:size] +} diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go new file mode 100644 index 000000000..3a62f51be --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_amd64.go @@ -0,0 +1,280 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 i386 + +package fpu + +import ( + "io" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// initX86FPState (defined in asm files) sets up initial state. +func initX86FPState(data *byte, useXsave bool) + +func newX86FPStateSlice() State { + size, align := cpuid.HostFeatureSet().ExtendedStateSize() + capacity := size + // Always use at least 4096 bytes. + // + // For the KVM platform, this state is a fixed 4096 bytes, so make sure + // that the underlying array is at _least_ that size otherwise we will + // corrupt random memory. This is not a pleasant thing to debug. + if capacity < 4096 { + capacity = 4096 + } + return alignedBytes(capacity, align)[:size] +} + +// NewState returns an initialized floating point state. +// +// The returned state is large enough to store all floating point state +// supported by host, even if the app won't use much of it due to a restricted +// FeatureSet. Since they may still be able to see state not advertised by +// CPUID we must ensure it does not contain any sentry state. +func NewState() State { + f := newX86FPStateSlice() + initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) + return f +} + +// Fork creates and returns an identical copy of the x86 floating point state. +func (s *State) Fork() State { + n := newX86FPStateSlice() + copy(n, *s) + return n +} + +// ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type +// manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, +// ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. +const ptraceFPRegsSize = 512 + +// PtraceGetFPRegs implements Context.PtraceGetFPRegs. +func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { + if maxlen < ptraceFPRegsSize { + return 0, syserror.EFAULT + } + + return dst.Write((*s)[:ptraceFPRegsSize]) +} + +// PtraceSetFPRegs implements Context.PtraceSetFPRegs. +func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { + if maxlen < ptraceFPRegsSize { + return 0, syserror.EFAULT + } + + var f [ptraceFPRegsSize]byte + n, err := io.ReadFull(src, f[:]) + if err != nil { + return 0, err + } + // Force reserved bits in MXCSR to 0. This is consistent with Linux. + sanitizeMXCSR(State(f[:])) + // N.B. this only copies the beginning of the FP state, which + // corresponds to the FXSAVE area. + copy(*s, f[:]) + return n, nil +} + +const ( + // mxcsrOffset is the offset in bytes of the MXCSR field from the start of + // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE + // Area") + mxcsrOffset = 24 + + // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the + // start of the FXSAVE area. + mxcsrMaskOffset = 28 +) + +var ( + mxcsrMask uint32 + initMXCSRMask sync.Once +) + +const ( + // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal + // to the size of the XSAVE legacy area (512 bytes) plus the size of the + // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's + // X86_XSTATE_SSE_SIZE. + minXstateBytes = 512 + 64 + + // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD + // field in Linux's struct user_xstateregs, which is the type manipulated + // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, + // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. + userXstateXCR0Offset = 464 + + // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 + // XSAVE area. + xstateBVOffset = 512 + + // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the + // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is + // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE + // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". + // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP + // exceptions resulting from invalid values; we aren't. Linux also never + // uses the compacted format when doing XSAVE and doesn't even define the + // compaction extensions to XSAVE as a CPU feature, so for simplicity we + // assume no one is using them. + xsaveHeaderZeroedOffset = 512 + 8 + xsaveHeaderZeroedBytes = 64 - 8 +) + +// sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR +// generates a general-protection fault (#GP) in response to an attempt to set +// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section +// 10.5.1.2 "SSE State") +func sanitizeMXCSR(f State) { + mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:]) + initMXCSRMask.Do(func() { + temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) + initX86FPState(&temp[0], false /* useXsave */) + mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) + if mxcsrMask == 0 { + // "If the value of the MXCSR_MASK field is 00000000H, then the + // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM + // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR + // Register" + mxcsrMask = 0xffbf + } + }) + mxcsr &= mxcsrMask + usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) +} + +// PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by +// writing the floating point registers from this state to dst and returning the +// number of bytes written, which must be less than or equal to maxlen. +func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid.FeatureSet) (int, error) { + // N.B. s.x86FPState may contain more state than the application + // expects. We only copy the subset that would be in their XSAVE area. + ess, _ := featureSet.ExtendedStateSize() + f := make([]byte, ess) + copy(f, *s) + // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are + // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE + // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE + // mask. GDB relies on this: see + // gdb/x86-linux-nat.c:x86_linux_read_description(). + usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) + if len(f) > maxlen { + f = f[:maxlen] + } + return dst.Write(f) +} + +// PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by +// reading floating point registers from src and returning the number of bytes +// read, which must be less than or equal to maxlen. +func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid.FeatureSet) (int, error) { + // Allow users to pass an xstate register set smaller than ours (they can + // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. + // Also allow users to pass a register set larger than ours; anything after + // their ExtendedStateSize will be ignored. (I think Linux technically + // permits setting a register set smaller than minXstateBytes, but it has + // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) + if maxlen < minXstateBytes { + return 0, unix.EFAULT + } + ess, _ := featureSet.ExtendedStateSize() + if maxlen > int(ess) { + maxlen = int(ess) + } + f := make([]byte, maxlen) + if _, err := io.ReadFull(src, f); err != nil { + return 0, err + } + // Force reserved bits in MXCSR to 0. This is consistent with Linux. + sanitizeMXCSR(State(f)) + // Users can't enable *more* XCR0 bits than what we, and the CPU, support. + xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:]) + xstateBV &= featureSet.ValidXCR0Mask() + usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) + // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. + reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] + for i := range reserved { + reserved[i] = 0 + } + return copy(*s, f), nil +} + +// BytePointer returns a pointer to the first byte of the state. +// +//go:nosplit +func (s *State) BytePointer() *byte { + return &(*s)[0] +} + +// XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 +// and SSE state, so this is the equivalent XSTATE_BV value. +const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE + +// AfterLoad converts the loaded state to the format that compatible with the +// current processor. +func (s *State) AfterLoad() { + old := *s + + // Recreate the slice. This is done to ensure that it is aligned + // appropriately in memory, and large enough to accommodate any new + // state that may be saved by the new CPU. Even if extraneous new state + // is saved, the state we care about is guaranteed to be a subset of + // new state. Later optimizations can use less space when using a + // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has + // more info. + *s = NewState() + + // x86FPState always contains all the FP state supported by the host. + // We may have come from a newer machine that supports additional state + // which we cannot restore. + // + // The x86 FP state areas are backwards compatible, so we can simply + // truncate the additional floating point state. + // + // Applications should not depend on the truncated state because it + // should relate only to features that were not exposed in the app + // FeatureSet. However, because we do not *prevent* them from using + // this state, we must verify here that there is no in-use state + // (according to XSTATE_BV) which we do not support. + if len(*s) < len(old) { + // What do we support? + supportedBV := fxsaveBV + if fs := cpuid.HostFeatureSet(); fs.UseXsave() { + supportedBV = fs.ValidXCR0Mask() + } + + // What was in use? + savedBV := fxsaveBV + if len(old) >= xstateBVOffset+8 { + savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:]) + } + + // Supported features must be a superset of saved features. + if savedBV&^supportedBV != 0 { + panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) + } + } + + // Copy to the new, aligned location. + copy(*s, old) +} diff --git a/pkg/sentry/arch/fpu/fpu_amd64.s b/pkg/sentry/arch/fpu/fpu_amd64.s new file mode 100644 index 000000000..6c10336e7 --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_amd64.s @@ -0,0 +1,136 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// MXCSR_DEFAULT is the reset value of MXCSR (Intel SDM Vol. 2, Ch. 3.2 +// "LDMXCSR") +#define MXCSR_DEFAULT 0x1f80 + +// MXCSR_OFFSET is the offset in bytes of the MXCSR field from the start of the +// FXSAVE/XSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE Area") +#define MXCSR_OFFSET 24 + +// initX86FPState initializes floating point state. +// +// func initX86FPState(data *FloatingPointData, useXsave bool) +// +// We need to clear out and initialize an empty fp state area since the sentry, +// or any previous loader, may have left sensitive information in the floating +// point registers. +// +// Preconditions: data is zeroed. +TEXT ·initX86FPState(SB), $24-16 + // Save MXCSR (callee-save) + STMXCSR mxcsr-8(SP) + + // Save x87 CW (callee-save) + FSTCW cw-16(SP) + + MOVQ fpState+0(FP), DI + + // Do we use xsave? + MOVBQZX useXsave+8(FP), AX + TESTQ AX, AX + JZ no_xsave + + // Use XRSTOR to clear all FP state to an initial state. + // + // The fpState XSAVE area is zeroed on function entry, meaning + // XSTATE_BV is zero. + // + // "If RFBM[i] = 1 and bit i is clear in the XSTATE_BV field in the + // XSAVE header, XRSTOR initializes state component i." + // + // Initialization is defined in SDM Vol 1, Chapter 13.3. It puts all + // the registers in a reasonable initial state, except MXCSR: + // + // "The MXCSR register is part of state component 1, SSE state (see + // Section 13.5.2). However, the standard form of XRSTOR loads the + // MXCSR register from memory whenever the RFBM[1] (SSE) or RFBM[2] + // (AVX) is set, regardless of the values of XSTATE_BV[1] and + // XSTATE_BV[2]." + + // Set MXCSR to the default value. + MOVL $MXCSR_DEFAULT, MXCSR_OFFSET(DI) + + // Initialize registers with XRSTOR. + MOVL $0xffffffff, AX + MOVL $0xffffffff, DX + BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f // XRSTOR64 0(DI) + + // Now that all the state has been reset, write it back out to the + // XSAVE area. + BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27 // XSAVE64 0(DI) + + JMP out + +no_xsave: + // Clear out existing X values. + PXOR X0, X0 + MOVO X0, X1 + MOVO X0, X2 + MOVO X0, X3 + MOVO X0, X4 + MOVO X0, X5 + MOVO X0, X6 + MOVO X0, X7 + MOVO X0, X8 + MOVO X0, X9 + MOVO X0, X10 + MOVO X0, X11 + MOVO X0, X12 + MOVO X0, X13 + MOVO X0, X14 + MOVO X0, X15 + + // Zero out %rax and store into MMX registers. MMX registers are + // an alias of 8x64 bits of the 8x80 bits used for the original + // x87 registers. Storing zero into them will reset the FPU registers + // to bits [63:0] = 0, [79:64] = 1. But the contents aren't too + // important, just the fact that we have reset them to a known value. + XORQ AX, AX + MOVQ AX, M0 + MOVQ AX, M1 + MOVQ AX, M2 + MOVQ AX, M3 + MOVQ AX, M4 + MOVQ AX, M5 + MOVQ AX, M6 + MOVQ AX, M7 + + // The Go assembler doesn't support FNINIT, so we use BYTE. + // This will: + // - Reset FPU control word to 0x037f + // - Clear FPU status word + // - Reset FPU tag word to 0xffff + // - Clear FPU data pointer + // - Clear FPU instruction pointer + BYTE $0xDB; BYTE $0xE3; // FNINIT + + // Reset MXCSR. + MOVL $MXCSR_DEFAULT, tmpmxcsr-24(SP) + LDMXCSR tmpmxcsr-24(SP) + + // Save the floating point state with fxsave. + FXSAVE64 0(DI) + +out: + // Restore MXCSR. + LDMXCSR mxcsr-8(SP) + + // Restore x87 CW. + FLDCW cw-16(SP) + + RET diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go new file mode 100644 index 000000000..d2f62631d --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_arm64.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package fpu + +const ( + // fpsimdMagic is the magic number which is used in fpsimd_context. + fpsimdMagic = 0x46508001 + + // fpsimdContextSize is the size of fpsimd_context. + fpsimdContextSize = 0x210 +) + +// initAarch64FPState sets up initial state. +// +// Related code in Linux kernel: fpsimd_flush_thread(). +// FPCR = FPCR_RM_RN (0x0 << 22). +// +// Currently, aarch64FPState is only a space of 0x210 length for fpstate. +// The fp head is useless in sentry/ptrace/kvm. +// +func initAarch64FPState(data *State) { +} + +func newAarch64FPStateSlice() []byte { + return alignedBytes(4096, 16)[:fpsimdContextSize] +} + +// NewState returns an initialized floating point state. +// +// The returned state is large enough to store all floating point state +// supported by host, even if the app won't use much of it due to a restricted +// FeatureSet. +func NewState() State { + f := State(newAarch64FPStateSlice()) + initAarch64FPState(&f) + return f +} + +// Fork creates and returns an identical copy of the aarch64 floating point state. +func (s *State) Fork() State { + n := State(newAarch64FPStateSlice()) + copy(n, *s) + return n +} + +// BytePointer returns a pointer to the first byte of the state. +func (s *State) BytePointer() *byte { + return &(*s)[0] +} |