summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/arch/fpu
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@google.com>2021-03-23 18:44:38 -0700
committergVisor bot <gvisor-bot@google.com>2021-03-23 18:46:37 -0700
commit56a9a13976ad800a8a34b194d35f0169d0a0bb23 (patch)
treecb4b7c4352dc90a8c4c4f469c788fd2c5c6fd0dd /pkg/sentry/arch/fpu
parent960155cdaad49ccea07e45152f124beeb7e7fdcc (diff)
Move the code that manages floating-point state to a separate package
This change is inspired by Adin's cl/355256448. PiperOrigin-RevId: 364695931
Diffstat (limited to 'pkg/sentry/arch/fpu')
-rw-r--r--pkg/sentry/arch/fpu/BUILD21
-rw-r--r--pkg/sentry/arch/fpu/fpu.go54
-rw-r--r--pkg/sentry/arch/fpu/fpu_amd64.go280
-rw-r--r--pkg/sentry/arch/fpu/fpu_amd64.s136
-rw-r--r--pkg/sentry/arch/fpu/fpu_arm64.go63
5 files changed, 554 insertions, 0 deletions
diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD
new file mode 100644
index 000000000..0a5395267
--- /dev/null
+++ b/pkg/sentry/arch/fpu/BUILD
@@ -0,0 +1,21 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "fpu",
+ srcs = [
+ "fpu.go",
+ "fpu_amd64.go",
+ "fpu_amd64.s",
+ "fpu_arm64.go",
+ ],
+ visibility = ["//:sandbox"],
+ deps = [
+ "//pkg/cpuid",
+ "//pkg/sync",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/pkg/sentry/arch/fpu/fpu.go b/pkg/sentry/arch/fpu/fpu.go
new file mode 100644
index 000000000..867d309a3
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu.go
@@ -0,0 +1,54 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fpu provides basic floating point helpers.
+package fpu
+
+import (
+ "fmt"
+ "reflect"
+)
+
+// State represents floating point state.
+//
+// This is a simple byte slice, but may have architecture-specific methods
+// attached to it.
+type State []byte
+
+// ErrLoadingState indicates a failed restore due to unusable floating point
+// state.
+type ErrLoadingState struct {
+ // supported is the supported floating point state.
+ supportedFeatures uint64
+
+ // saved is the saved floating point state.
+ savedFeatures uint64
+}
+
+// Error returns a sensible description of the restore error.
+func (e ErrLoadingState) Error() string {
+ return fmt.Sprintf("floating point state contains unsupported features; supported: %#x saved: %#x", e.supportedFeatures, e.savedFeatures)
+}
+
+// alignedBytes returns a slice of size bytes, aligned in memory to the given
+// alignment. This is used because we require certain structures to be aligned
+// in a specific way (for example, the X86 floating point data).
+func alignedBytes(size, alignment uint) []byte {
+ data := make([]byte, size+alignment-1)
+ offset := uint(reflect.ValueOf(data).Index(0).Addr().Pointer() % uintptr(alignment))
+ if offset == 0 {
+ return data[:size:size]
+ }
+ return data[alignment-offset:][:size:size]
+}
diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go
new file mode 100644
index 000000000..3a62f51be
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu_amd64.go
@@ -0,0 +1,280 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64 i386
+
+package fpu
+
+import (
+ "io"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// initX86FPState (defined in asm files) sets up initial state.
+func initX86FPState(data *byte, useXsave bool)
+
+func newX86FPStateSlice() State {
+ size, align := cpuid.HostFeatureSet().ExtendedStateSize()
+ capacity := size
+ // Always use at least 4096 bytes.
+ //
+ // For the KVM platform, this state is a fixed 4096 bytes, so make sure
+ // that the underlying array is at _least_ that size otherwise we will
+ // corrupt random memory. This is not a pleasant thing to debug.
+ if capacity < 4096 {
+ capacity = 4096
+ }
+ return alignedBytes(capacity, align)[:size]
+}
+
+// NewState returns an initialized floating point state.
+//
+// The returned state is large enough to store all floating point state
+// supported by host, even if the app won't use much of it due to a restricted
+// FeatureSet. Since they may still be able to see state not advertised by
+// CPUID we must ensure it does not contain any sentry state.
+func NewState() State {
+ f := newX86FPStateSlice()
+ initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave())
+ return f
+}
+
+// Fork creates and returns an identical copy of the x86 floating point state.
+func (s *State) Fork() State {
+ n := newX86FPStateSlice()
+ copy(n, *s)
+ return n
+}
+
+// ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type
+// manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently,
+// ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area.
+const ptraceFPRegsSize = 512
+
+// PtraceGetFPRegs implements Context.PtraceGetFPRegs.
+func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) {
+ if maxlen < ptraceFPRegsSize {
+ return 0, syserror.EFAULT
+ }
+
+ return dst.Write((*s)[:ptraceFPRegsSize])
+}
+
+// PtraceSetFPRegs implements Context.PtraceSetFPRegs.
+func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) {
+ if maxlen < ptraceFPRegsSize {
+ return 0, syserror.EFAULT
+ }
+
+ var f [ptraceFPRegsSize]byte
+ n, err := io.ReadFull(src, f[:])
+ if err != nil {
+ return 0, err
+ }
+ // Force reserved bits in MXCSR to 0. This is consistent with Linux.
+ sanitizeMXCSR(State(f[:]))
+ // N.B. this only copies the beginning of the FP state, which
+ // corresponds to the FXSAVE area.
+ copy(*s, f[:])
+ return n, nil
+}
+
+const (
+ // mxcsrOffset is the offset in bytes of the MXCSR field from the start of
+ // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE
+ // Area")
+ mxcsrOffset = 24
+
+ // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the
+ // start of the FXSAVE area.
+ mxcsrMaskOffset = 28
+)
+
+var (
+ mxcsrMask uint32
+ initMXCSRMask sync.Once
+)
+
+const (
+ // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal
+ // to the size of the XSAVE legacy area (512 bytes) plus the size of the
+ // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's
+ // X86_XSTATE_SSE_SIZE.
+ minXstateBytes = 512 + 64
+
+ // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD
+ // field in Linux's struct user_xstateregs, which is the type manipulated
+ // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently,
+ // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET.
+ userXstateXCR0Offset = 464
+
+ // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86
+ // XSAVE area.
+ xstateBVOffset = 512
+
+ // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the
+ // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is
+ // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE
+ // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header".
+ // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP
+ // exceptions resulting from invalid values; we aren't. Linux also never
+ // uses the compacted format when doing XSAVE and doesn't even define the
+ // compaction extensions to XSAVE as a CPU feature, so for simplicity we
+ // assume no one is using them.
+ xsaveHeaderZeroedOffset = 512 + 8
+ xsaveHeaderZeroedBytes = 64 - 8
+)
+
+// sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR
+// generates a general-protection fault (#GP) in response to an attempt to set
+// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section
+// 10.5.1.2 "SSE State")
+func sanitizeMXCSR(f State) {
+ mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:])
+ initMXCSRMask.Do(func() {
+ temp := State(alignedBytes(uint(ptraceFPRegsSize), 16))
+ initX86FPState(&temp[0], false /* useXsave */)
+ mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
+ if mxcsrMask == 0 {
+ // "If the value of the MXCSR_MASK field is 00000000H, then the
+ // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM
+ // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR
+ // Register"
+ mxcsrMask = 0xffbf
+ }
+ })
+ mxcsr &= mxcsrMask
+ usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr)
+}
+
+// PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by
+// writing the floating point registers from this state to dst and returning the
+// number of bytes written, which must be less than or equal to maxlen.
+func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid.FeatureSet) (int, error) {
+ // N.B. s.x86FPState may contain more state than the application
+ // expects. We only copy the subset that would be in their XSAVE area.
+ ess, _ := featureSet.ExtendedStateSize()
+ f := make([]byte, ess)
+ copy(f, *s)
+ // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are
+ // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE
+ // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE
+ // mask. GDB relies on this: see
+ // gdb/x86-linux-nat.c:x86_linux_read_description().
+ usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask())
+ if len(f) > maxlen {
+ f = f[:maxlen]
+ }
+ return dst.Write(f)
+}
+
+// PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by
+// reading floating point registers from src and returning the number of bytes
+// read, which must be less than or equal to maxlen.
+func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid.FeatureSet) (int, error) {
+ // Allow users to pass an xstate register set smaller than ours (they can
+ // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes.
+ // Also allow users to pass a register set larger than ours; anything after
+ // their ExtendedStateSize will be ignored. (I think Linux technically
+ // permits setting a register set smaller than minXstateBytes, but it has
+ // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().)
+ if maxlen < minXstateBytes {
+ return 0, unix.EFAULT
+ }
+ ess, _ := featureSet.ExtendedStateSize()
+ if maxlen > int(ess) {
+ maxlen = int(ess)
+ }
+ f := make([]byte, maxlen)
+ if _, err := io.ReadFull(src, f); err != nil {
+ return 0, err
+ }
+ // Force reserved bits in MXCSR to 0. This is consistent with Linux.
+ sanitizeMXCSR(State(f))
+ // Users can't enable *more* XCR0 bits than what we, and the CPU, support.
+ xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:])
+ xstateBV &= featureSet.ValidXCR0Mask()
+ usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV)
+ // Force XCOMP_BV and reserved bytes in the XSAVE header to 0.
+ reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes]
+ for i := range reserved {
+ reserved[i] = 0
+ }
+ return copy(*s, f), nil
+}
+
+// BytePointer returns a pointer to the first byte of the state.
+//
+//go:nosplit
+func (s *State) BytePointer() *byte {
+ return &(*s)[0]
+}
+
+// XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87
+// and SSE state, so this is the equivalent XSTATE_BV value.
+const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE
+
+// AfterLoad converts the loaded state to the format that compatible with the
+// current processor.
+func (s *State) AfterLoad() {
+ old := *s
+
+ // Recreate the slice. This is done to ensure that it is aligned
+ // appropriately in memory, and large enough to accommodate any new
+ // state that may be saved by the new CPU. Even if extraneous new state
+ // is saved, the state we care about is guaranteed to be a subset of
+ // new state. Later optimizations can use less space when using a
+ // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has
+ // more info.
+ *s = NewState()
+
+ // x86FPState always contains all the FP state supported by the host.
+ // We may have come from a newer machine that supports additional state
+ // which we cannot restore.
+ //
+ // The x86 FP state areas are backwards compatible, so we can simply
+ // truncate the additional floating point state.
+ //
+ // Applications should not depend on the truncated state because it
+ // should relate only to features that were not exposed in the app
+ // FeatureSet. However, because we do not *prevent* them from using
+ // this state, we must verify here that there is no in-use state
+ // (according to XSTATE_BV) which we do not support.
+ if len(*s) < len(old) {
+ // What do we support?
+ supportedBV := fxsaveBV
+ if fs := cpuid.HostFeatureSet(); fs.UseXsave() {
+ supportedBV = fs.ValidXCR0Mask()
+ }
+
+ // What was in use?
+ savedBV := fxsaveBV
+ if len(old) >= xstateBVOffset+8 {
+ savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:])
+ }
+
+ // Supported features must be a superset of saved features.
+ if savedBV&^supportedBV != 0 {
+ panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV})
+ }
+ }
+
+ // Copy to the new, aligned location.
+ copy(*s, old)
+}
diff --git a/pkg/sentry/arch/fpu/fpu_amd64.s b/pkg/sentry/arch/fpu/fpu_amd64.s
new file mode 100644
index 000000000..6c10336e7
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu_amd64.s
@@ -0,0 +1,136 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+
+// MXCSR_DEFAULT is the reset value of MXCSR (Intel SDM Vol. 2, Ch. 3.2
+// "LDMXCSR")
+#define MXCSR_DEFAULT 0x1f80
+
+// MXCSR_OFFSET is the offset in bytes of the MXCSR field from the start of the
+// FXSAVE/XSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE Area")
+#define MXCSR_OFFSET 24
+
+// initX86FPState initializes floating point state.
+//
+// func initX86FPState(data *FloatingPointData, useXsave bool)
+//
+// We need to clear out and initialize an empty fp state area since the sentry,
+// or any previous loader, may have left sensitive information in the floating
+// point registers.
+//
+// Preconditions: data is zeroed.
+TEXT ·initX86FPState(SB), $24-16
+ // Save MXCSR (callee-save)
+ STMXCSR mxcsr-8(SP)
+
+ // Save x87 CW (callee-save)
+ FSTCW cw-16(SP)
+
+ MOVQ fpState+0(FP), DI
+
+ // Do we use xsave?
+ MOVBQZX useXsave+8(FP), AX
+ TESTQ AX, AX
+ JZ no_xsave
+
+ // Use XRSTOR to clear all FP state to an initial state.
+ //
+ // The fpState XSAVE area is zeroed on function entry, meaning
+ // XSTATE_BV is zero.
+ //
+ // "If RFBM[i] = 1 and bit i is clear in the XSTATE_BV field in the
+ // XSAVE header, XRSTOR initializes state component i."
+ //
+ // Initialization is defined in SDM Vol 1, Chapter 13.3. It puts all
+ // the registers in a reasonable initial state, except MXCSR:
+ //
+ // "The MXCSR register is part of state component 1, SSE state (see
+ // Section 13.5.2). However, the standard form of XRSTOR loads the
+ // MXCSR register from memory whenever the RFBM[1] (SSE) or RFBM[2]
+ // (AVX) is set, regardless of the values of XSTATE_BV[1] and
+ // XSTATE_BV[2]."
+
+ // Set MXCSR to the default value.
+ MOVL $MXCSR_DEFAULT, MXCSR_OFFSET(DI)
+
+ // Initialize registers with XRSTOR.
+ MOVL $0xffffffff, AX
+ MOVL $0xffffffff, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f // XRSTOR64 0(DI)
+
+ // Now that all the state has been reset, write it back out to the
+ // XSAVE area.
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27 // XSAVE64 0(DI)
+
+ JMP out
+
+no_xsave:
+ // Clear out existing X values.
+ PXOR X0, X0
+ MOVO X0, X1
+ MOVO X0, X2
+ MOVO X0, X3
+ MOVO X0, X4
+ MOVO X0, X5
+ MOVO X0, X6
+ MOVO X0, X7
+ MOVO X0, X8
+ MOVO X0, X9
+ MOVO X0, X10
+ MOVO X0, X11
+ MOVO X0, X12
+ MOVO X0, X13
+ MOVO X0, X14
+ MOVO X0, X15
+
+ // Zero out %rax and store into MMX registers. MMX registers are
+ // an alias of 8x64 bits of the 8x80 bits used for the original
+ // x87 registers. Storing zero into them will reset the FPU registers
+ // to bits [63:0] = 0, [79:64] = 1. But the contents aren't too
+ // important, just the fact that we have reset them to a known value.
+ XORQ AX, AX
+ MOVQ AX, M0
+ MOVQ AX, M1
+ MOVQ AX, M2
+ MOVQ AX, M3
+ MOVQ AX, M4
+ MOVQ AX, M5
+ MOVQ AX, M6
+ MOVQ AX, M7
+
+ // The Go assembler doesn't support FNINIT, so we use BYTE.
+ // This will:
+ // - Reset FPU control word to 0x037f
+ // - Clear FPU status word
+ // - Reset FPU tag word to 0xffff
+ // - Clear FPU data pointer
+ // - Clear FPU instruction pointer
+ BYTE $0xDB; BYTE $0xE3; // FNINIT
+
+ // Reset MXCSR.
+ MOVL $MXCSR_DEFAULT, tmpmxcsr-24(SP)
+ LDMXCSR tmpmxcsr-24(SP)
+
+ // Save the floating point state with fxsave.
+ FXSAVE64 0(DI)
+
+out:
+ // Restore MXCSR.
+ LDMXCSR mxcsr-8(SP)
+
+ // Restore x87 CW.
+ FLDCW cw-16(SP)
+
+ RET
diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go
new file mode 100644
index 000000000..d2f62631d
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu_arm64.go
@@ -0,0 +1,63 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package fpu
+
+const (
+ // fpsimdMagic is the magic number which is used in fpsimd_context.
+ fpsimdMagic = 0x46508001
+
+ // fpsimdContextSize is the size of fpsimd_context.
+ fpsimdContextSize = 0x210
+)
+
+// initAarch64FPState sets up initial state.
+//
+// Related code in Linux kernel: fpsimd_flush_thread().
+// FPCR = FPCR_RM_RN (0x0 << 22).
+//
+// Currently, aarch64FPState is only a space of 0x210 length for fpstate.
+// The fp head is useless in sentry/ptrace/kvm.
+//
+func initAarch64FPState(data *State) {
+}
+
+func newAarch64FPStateSlice() []byte {
+ return alignedBytes(4096, 16)[:fpsimdContextSize]
+}
+
+// NewState returns an initialized floating point state.
+//
+// The returned state is large enough to store all floating point state
+// supported by host, even if the app won't use much of it due to a restricted
+// FeatureSet.
+func NewState() State {
+ f := State(newAarch64FPStateSlice())
+ initAarch64FPState(&f)
+ return f
+}
+
+// Fork creates and returns an identical copy of the aarch64 floating point state.
+func (s *State) Fork() State {
+ n := State(newAarch64FPStateSlice())
+ copy(n, *s)
+ return n
+}
+
+// BytePointer returns a pointer to the first byte of the state.
+func (s *State) BytePointer() *byte {
+ return &(*s)[0]
+}