summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/arch/BUILD3
-rw-r--r--pkg/sentry/arch/arch.go34
-rw-r--r--pkg/sentry/arch/arch_aarch64.go71
-rw-r--r--pkg/sentry/arch/arch_amd64.go13
-rw-r--r--pkg/sentry/arch/arch_arm64.go13
-rw-r--r--pkg/sentry/arch/arch_state_x86.go54
-rw-r--r--pkg/sentry/arch/arch_x86.go217
-rw-r--r--pkg/sentry/arch/arch_x86_impl.go3
-rw-r--r--pkg/sentry/arch/fpu/BUILD21
-rw-r--r--pkg/sentry/arch/fpu/fpu.go54
-rw-r--r--pkg/sentry/arch/fpu/fpu_amd64.go280
-rw-r--r--pkg/sentry/arch/fpu/fpu_amd64.s (renamed from pkg/sentry/arch/arch_amd64.s)0
-rw-r--r--pkg/sentry/arch/fpu/fpu_arm64.go63
-rw-r--r--pkg/sentry/arch/signal_amd64.go9
-rw-r--r--pkg/sentry/arch/signal_arm64.go7
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go21
-rw-r--r--pkg/sentry/fs/gofer/file.go27
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go20
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go20
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go14
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go21
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go21
-rw-r--r--pkg/sentry/fsimpl/overlay/regular_file.go43
-rw-r--r--pkg/sentry/kernel/ptrace_amd64.go10
-rw-r--r--pkg/sentry/platform/kvm/BUILD2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go6
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64.go10
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64_test.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go33
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go13
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go3
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go3
-rw-r--r--pkg/sentry/platform/ptrace/BUILD1
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go9
-rw-r--r--pkg/sentry/syscalls/linux/error.go26
35 files changed, 681 insertions, 466 deletions
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index 85278b389..f660f1614 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -9,7 +9,6 @@ go_library(
"arch.go",
"arch_aarch64.go",
"arch_amd64.go",
- "arch_amd64.s",
"arch_arm64.go",
"arch_state_x86.go",
"arch_x86.go",
@@ -36,8 +35,8 @@ go_library(
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
+ "//pkg/sentry/arch/fpu",
"//pkg/sentry/limits",
- "//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go
index 3443b9e1b..921151137 100644
--- a/pkg/sentry/arch/arch.go
+++ b/pkg/sentry/arch/arch.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -50,12 +51,6 @@ func (a Arch) String() string {
}
}
-// FloatingPointData is a generic type, and will always be passed as a pointer.
-// We rely on the individual arch implementations to meet all the necessary
-// requirements. For example, on x86 the region must be 16-byte aligned and 512
-// bytes in size.
-type FloatingPointData []byte
-
// Context provides architecture-dependent information for a specific thread.
//
// NOTE(b/34169503): Currently we use uintptr here to refer to a generic native
@@ -187,7 +182,7 @@ type Context interface {
ClearSingleStep()
// FloatingPointData will be passed to underlying save routines.
- FloatingPointData() FloatingPointData
+ FloatingPointData() *fpu.State
// NewMmapLayout returns a layout for a new MM, where MinAddr for the
// returned layout must be no lower than min, and MaxAddr for the returned
@@ -221,16 +216,6 @@ type Context interface {
// number of bytes read.
PtraceSetRegs(src io.Reader) (int, error)
- // PtraceGetFPRegs implements ptrace(PTRACE_GETFPREGS) by writing the
- // floating-point registers represented by this Context to addr in dst and
- // returning the number of bytes written.
- PtraceGetFPRegs(dst io.Writer) (int, error)
-
- // PtraceSetFPRegs implements ptrace(PTRACE_SETFPREGS) by reading
- // floating-point registers from src into this Context and returning the
- // number of bytes read.
- PtraceSetFPRegs(src io.Reader) (int, error)
-
// PtraceGetRegSet implements ptrace(PTRACE_GETREGSET) by writing the
// register set given by architecture-defined value regset from this
// Context to dst and returning the number of bytes written, which must be
@@ -365,18 +350,3 @@ func (a SyscallArgument) SizeT() uint {
func (a SyscallArgument) ModeT() uint {
return uint(uint16(a.Value))
}
-
-// ErrFloatingPoint indicates a failed restore due to unusable floating point
-// state.
-type ErrFloatingPoint struct {
- // supported is the supported floating point state.
- supported uint64
-
- // saved is the saved floating point state.
- saved uint64
-}
-
-// Error returns a sensible description of the restore error.
-func (e ErrFloatingPoint) Error() string {
- return fmt.Sprintf("floating point state contains unsupported features; supported: %#x saved: %#x", e.supported, e.saved)
-}
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go
index 6b81e9708..08789f517 100644
--- a/pkg/sentry/arch/arch_aarch64.go
+++ b/pkg/sentry/arch/arch_aarch64.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -40,65 +41,11 @@ type Registers struct {
const (
// SyscallWidth is the width of insturctions.
SyscallWidth = 4
-
- // fpsimdMagic is the magic number which is used in fpsimd_context.
- fpsimdMagic = 0x46508001
-
- // fpsimdContextSize is the size of fpsimd_context.
- fpsimdContextSize = 0x210
)
// ARMTrapFlag is the mask for the trap flag.
const ARMTrapFlag = uint64(1) << 21
-// aarch64FPState is aarch64 floating point state.
-type aarch64FPState []byte
-
-// initAarch64FPState sets up initial state.
-//
-// Related code in Linux kernel: fpsimd_flush_thread().
-// FPCR = FPCR_RM_RN (0x0 << 22).
-//
-// Currently, aarch64FPState is only a space of 0x210 length for fpstate.
-// The fp head is useless in sentry/ptrace/kvm.
-//
-func initAarch64FPState(data aarch64FPState) {
-}
-
-func newAarch64FPStateSlice() []byte {
- return alignedBytes(4096, 16)[:fpsimdContextSize]
-}
-
-// newAarch64FPState returns an initialized floating point state.
-//
-// The returned state is large enough to store all floating point state
-// supported by host, even if the app won't use much of it due to a restricted
-// FeatureSet.
-func newAarch64FPState() aarch64FPState {
- f := aarch64FPState(newAarch64FPStateSlice())
- initAarch64FPState(f)
- return f
-}
-
-// fork creates and returns an identical copy of the aarch64 floating point state.
-func (f aarch64FPState) fork() aarch64FPState {
- n := aarch64FPState(newAarch64FPStateSlice())
- copy(n, f)
- return n
-}
-
-// FloatingPointData returns the raw data pointer.
-func (f aarch64FPState) FloatingPointData() FloatingPointData {
- return ([]byte)(f)
-}
-
-// NewFloatingPointData returns a new floating point data blob.
-//
-// This is primarily for use in tests.
-func NewFloatingPointData() FloatingPointData {
- return ([]byte)(newAarch64FPState())
-}
-
// State contains the common architecture bits for aarch64 (the build tag of this
// file ensures it's only built on aarch64).
//
@@ -108,7 +55,7 @@ type State struct {
Regs Registers
// Our floating point state.
- aarch64FPState `state:"wait"`
+ fpState fpu.State `state:"wait"`
// FeatureSet is a pointer to the currently active feature set.
FeatureSet *cpuid.FeatureSet
@@ -162,10 +109,10 @@ func (s State) Proto() *rpb.Registers {
// Fork creates and returns an identical copy of the state.
func (s *State) Fork() State {
return State{
- Regs: s.Regs,
- aarch64FPState: s.aarch64FPState.fork(),
- FeatureSet: s.FeatureSet,
- OrigR0: s.OrigR0,
+ Regs: s.Regs,
+ fpState: s.fpState.Fork(),
+ FeatureSet: s.FeatureSet,
+ OrigR0: s.OrigR0,
}
}
@@ -318,10 +265,10 @@ func New(arch Arch, fs *cpuid.FeatureSet) Context {
case ARM64:
return &context64{
State{
- aarch64FPState: newAarch64FPState(),
- FeatureSet: fs,
+ fpState: fpu.NewState(),
+ FeatureSet: fs,
},
- []aarch64FPState(nil),
+ []fpu.State(nil),
}
}
panic(fmt.Sprintf("unknown architecture %v", arch))
diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go
index 15d8ddb40..2571be60f 100644
--- a/pkg/sentry/arch/arch_amd64.go
+++ b/pkg/sentry/arch/arch_amd64.go
@@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -105,7 +106,7 @@ const (
// +stateify savable
type context64 struct {
State
- sigFPState []x86FPState // fpstate to be restored on sigreturn.
+ sigFPState []fpu.State // fpstate to be restored on sigreturn.
}
// Arch implements Context.Arch.
@@ -113,14 +114,18 @@ func (c *context64) Arch() Arch {
return AMD64
}
-func (c *context64) copySigFPState() []x86FPState {
- var sigfps []x86FPState
+func (c *context64) copySigFPState() []fpu.State {
+ var sigfps []fpu.State
for _, s := range c.sigFPState {
- sigfps = append(sigfps, s.fork())
+ sigfps = append(sigfps, s.Fork())
}
return sigfps
}
+func (c *context64) FloatingPointData() *fpu.State {
+ return &c.State.fpState
+}
+
// Fork returns an exact copy of this context.
func (c *context64) Fork() Context {
return &context64{
diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go
index 0c61a3ff7..14ad9483b 100644
--- a/pkg/sentry/arch/arch_arm64.go
+++ b/pkg/sentry/arch/arch_arm64.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -79,7 +80,7 @@ const (
// +stateify savable
type context64 struct {
State
- sigFPState []aarch64FPState // fpstate to be restored on sigreturn.
+ sigFPState []fpu.State // fpstate to be restored on sigreturn.
}
// Arch implements Context.Arch.
@@ -87,10 +88,10 @@ func (c *context64) Arch() Arch {
return ARM64
}
-func (c *context64) copySigFPState() []aarch64FPState {
- var sigfps []aarch64FPState
+func (c *context64) copySigFPState() []fpu.State {
+ var sigfps []fpu.State
for _, s := range c.sigFPState {
- sigfps = append(sigfps, s.fork())
+ sigfps = append(sigfps, s.Fork())
}
return sigfps
}
@@ -286,3 +287,7 @@ func (c *context64) PtracePokeUser(addr, data uintptr) error {
// TODO(gvisor.dev/issue/1239): Full ptrace supporting for Arm64.
return nil
}
+
+func (c *context64) FloatingPointData() *fpu.State {
+ return &c.State.fpState
+}
diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go
index 840e53d33..b2b94c304 100644
--- a/pkg/sentry/arch/arch_state_x86.go
+++ b/pkg/sentry/arch/arch_state_x86.go
@@ -16,59 +16,7 @@
package arch
-import (
- "gvisor.dev/gvisor/pkg/cpuid"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87
-// and SSE state, so this is the equivalent XSTATE_BV value.
-const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE
-
// afterLoadFPState is invoked by afterLoad.
func (s *State) afterLoadFPState() {
- old := s.x86FPState
-
- // Recreate the slice. This is done to ensure that it is aligned
- // appropriately in memory, and large enough to accommodate any new
- // state that may be saved by the new CPU. Even if extraneous new state
- // is saved, the state we care about is guaranteed to be a subset of
- // new state. Later optimizations can use less space when using a
- // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has
- // more info.
- s.x86FPState = newX86FPState()
-
- // x86FPState always contains all the FP state supported by the host.
- // We may have come from a newer machine that supports additional state
- // which we cannot restore.
- //
- // The x86 FP state areas are backwards compatible, so we can simply
- // truncate the additional floating point state.
- //
- // Applications should not depend on the truncated state because it
- // should relate only to features that were not exposed in the app
- // FeatureSet. However, because we do not *prevent* them from using
- // this state, we must verify here that there is no in-use state
- // (according to XSTATE_BV) which we do not support.
- if len(s.x86FPState) < len(old) {
- // What do we support?
- supportedBV := fxsaveBV
- if fs := cpuid.HostFeatureSet(); fs.UseXsave() {
- supportedBV = fs.ValidXCR0Mask()
- }
-
- // What was in use?
- savedBV := fxsaveBV
- if len(old) >= xstateBVOffset+8 {
- savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:])
- }
-
- // Supported features must be a superset of saved features.
- if savedBV&^supportedBV != 0 {
- panic(ErrFloatingPoint{supported: supportedBV, saved: savedBV})
- }
- }
-
- // Copy to the new, aligned location.
- copy(s.x86FPState, old)
+ s.fpState.AfterLoad()
}
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index 91edf0703..e8e52d3a8 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -24,10 +24,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
- "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Registers represents the CPU registers for this architecture.
@@ -111,57 +110,6 @@ var (
X86TrapFlag uint64 = (1 << 8)
)
-// x86FPState is x86 floating point state.
-type x86FPState []byte
-
-// initX86FPState (defined in asm files) sets up initial state.
-func initX86FPState(data *byte, useXsave bool)
-
-func newX86FPStateSlice() []byte {
- size, align := cpuid.HostFeatureSet().ExtendedStateSize()
- capacity := size
- // Always use at least 4096 bytes.
- //
- // For the KVM platform, this state is a fixed 4096 bytes, so make sure
- // that the underlying array is at _least_ that size otherwise we will
- // corrupt random memory. This is not a pleasant thing to debug.
- if capacity < 4096 {
- capacity = 4096
- }
- return alignedBytes(capacity, align)[:size]
-}
-
-// newX86FPState returns an initialized floating point state.
-//
-// The returned state is large enough to store all floating point state
-// supported by host, even if the app won't use much of it due to a restricted
-// FeatureSet. Since they may still be able to see state not advertised by
-// CPUID we must ensure it does not contain any sentry state.
-func newX86FPState() x86FPState {
- f := x86FPState(newX86FPStateSlice())
- initX86FPState(&f.FloatingPointData()[0], cpuid.HostFeatureSet().UseXsave())
- return f
-}
-
-// fork creates and returns an identical copy of the x86 floating point state.
-func (f x86FPState) fork() x86FPState {
- n := x86FPState(newX86FPStateSlice())
- copy(n, f)
- return n
-}
-
-// FloatingPointData returns the raw data pointer.
-func (f x86FPState) FloatingPointData() FloatingPointData {
- return []byte(f)
-}
-
-// NewFloatingPointData returns a new floating point data blob.
-//
-// This is primarily for use in tests.
-func NewFloatingPointData() FloatingPointData {
- return (FloatingPointData)(newX86FPState())
-}
-
// Proto returns a protobuf representation of the system registers in State.
func (s State) Proto() *rpb.Registers {
regs := &rpb.AMD64Registers{
@@ -200,7 +148,7 @@ func (s State) Proto() *rpb.Registers {
func (s *State) Fork() State {
return State{
Regs: s.Regs,
- x86FPState: s.x86FPState.fork(),
+ fpState: s.fpState.Fork(),
FeatureSet: s.FeatureSet,
}
}
@@ -393,149 +341,6 @@ func isValidSegmentBase(reg uint64) bool {
return reg < uint64(maxAddr64)
}
-// ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type
-// manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently,
-// ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area.
-const ptraceFPRegsSize = 512
-
-// PtraceGetFPRegs implements Context.PtraceGetFPRegs.
-func (s *State) PtraceGetFPRegs(dst io.Writer) (int, error) {
- return dst.Write(s.x86FPState[:ptraceFPRegsSize])
-}
-
-// PtraceSetFPRegs implements Context.PtraceSetFPRegs.
-func (s *State) PtraceSetFPRegs(src io.Reader) (int, error) {
- var f [ptraceFPRegsSize]byte
- n, err := io.ReadFull(src, f[:])
- if err != nil {
- return 0, err
- }
- // Force reserved bits in MXCSR to 0. This is consistent with Linux.
- sanitizeMXCSR(x86FPState(f[:]))
- // N.B. this only copies the beginning of the FP state, which
- // corresponds to the FXSAVE area.
- copy(s.x86FPState, f[:])
- return n, nil
-}
-
-const (
- // mxcsrOffset is the offset in bytes of the MXCSR field from the start of
- // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE
- // Area")
- mxcsrOffset = 24
-
- // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the
- // start of the FXSAVE area.
- mxcsrMaskOffset = 28
-)
-
-var (
- mxcsrMask uint32
- initMXCSRMask sync.Once
-)
-
-// sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR
-// generates a general-protection fault (#GP) in response to an attempt to set
-// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section
-// 10.5.1.2 "SSE State")
-func sanitizeMXCSR(f x86FPState) {
- mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:])
- initMXCSRMask.Do(func() {
- temp := x86FPState(alignedBytes(uint(ptraceFPRegsSize), 16))
- initX86FPState(&temp.FloatingPointData()[0], false /* useXsave */)
- mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
- if mxcsrMask == 0 {
- // "If the value of the MXCSR_MASK field is 00000000H, then the
- // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM
- // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR
- // Register"
- mxcsrMask = 0xffbf
- }
- })
- mxcsr &= mxcsrMask
- usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr)
-}
-
-const (
- // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal
- // to the size of the XSAVE legacy area (512 bytes) plus the size of the
- // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's
- // X86_XSTATE_SSE_SIZE.
- minXstateBytes = 512 + 64
-
- // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD
- // field in Linux's struct user_xstateregs, which is the type manipulated
- // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently,
- // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET.
- userXstateXCR0Offset = 464
-
- // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86
- // XSAVE area.
- xstateBVOffset = 512
-
- // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the
- // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is
- // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE
- // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header".
- // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP
- // exceptions resulting from invalid values; we aren't. Linux also never
- // uses the compacted format when doing XSAVE and doesn't even define the
- // compaction extensions to XSAVE as a CPU feature, so for simplicity we
- // assume no one is using them.
- xsaveHeaderZeroedOffset = 512 + 8
- xsaveHeaderZeroedBytes = 64 - 8
-)
-
-func (s *State) ptraceGetXstateRegs(dst io.Writer, maxlen int) (int, error) {
- // N.B. s.x86FPState may contain more state than the application
- // expects. We only copy the subset that would be in their XSAVE area.
- ess, _ := s.FeatureSet.ExtendedStateSize()
- f := make([]byte, ess)
- copy(f, s.x86FPState)
- // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are
- // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE
- // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE
- // mask. GDB relies on this: see
- // gdb/x86-linux-nat.c:x86_linux_read_description().
- usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], s.FeatureSet.ValidXCR0Mask())
- if len(f) > maxlen {
- f = f[:maxlen]
- }
- return dst.Write(f)
-}
-
-func (s *State) ptraceSetXstateRegs(src io.Reader, maxlen int) (int, error) {
- // Allow users to pass an xstate register set smaller than ours (they can
- // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes.
- // Also allow users to pass a register set larger than ours; anything after
- // their ExtendedStateSize will be ignored. (I think Linux technically
- // permits setting a register set smaller than minXstateBytes, but it has
- // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().)
- if maxlen < minXstateBytes {
- return 0, unix.EFAULT
- }
- ess, _ := s.FeatureSet.ExtendedStateSize()
- if maxlen > int(ess) {
- maxlen = int(ess)
- }
- f := make([]byte, maxlen)
- if _, err := io.ReadFull(src, f); err != nil {
- return 0, err
- }
- // Force reserved bits in MXCSR to 0. This is consistent with Linux.
- sanitizeMXCSR(x86FPState(f))
- // Users can't enable *more* XCR0 bits than what we, and the CPU, support.
- xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:])
- xstateBV &= s.FeatureSet.ValidXCR0Mask()
- usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV)
- // Force XCOMP_BV and reserved bytes in the XSAVE header to 0.
- reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes]
- for i := range reserved {
- reserved[i] = 0
- }
- return copy(s.x86FPState, f), nil
-}
-
// Register sets defined in include/uapi/linux/elf.h.
const (
_NT_PRSTATUS = 1
@@ -552,12 +357,9 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int,
}
return s.PtraceGetRegs(dst)
case _NT_PRFPREG:
- if maxlen < ptraceFPRegsSize {
- return 0, syserror.EFAULT
- }
- return s.PtraceGetFPRegs(dst)
+ return s.fpState.PtraceGetFPRegs(dst, maxlen)
case _NT_X86_XSTATE:
- return s.ptraceGetXstateRegs(dst, maxlen)
+ return s.fpState.PtraceGetXstateRegs(dst, maxlen, s.FeatureSet)
default:
return 0, syserror.EINVAL
}
@@ -572,12 +374,9 @@ func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int,
}
return s.PtraceSetRegs(src)
case _NT_PRFPREG:
- if maxlen < ptraceFPRegsSize {
- return 0, syserror.EFAULT
- }
- return s.PtraceSetFPRegs(src)
+ return s.fpState.PtraceSetFPRegs(src, maxlen)
case _NT_X86_XSTATE:
- return s.ptraceSetXstateRegs(src, maxlen)
+ return s.fpState.PtraceSetXstateRegs(src, maxlen, s.FeatureSet)
default:
return 0, syserror.EINVAL
}
@@ -609,10 +408,10 @@ func New(arch Arch, fs *cpuid.FeatureSet) Context {
case AMD64:
return &context64{
State{
- x86FPState: newX86FPState(),
+ fpState: fpu.NewState(),
FeatureSet: fs,
},
- []x86FPState(nil),
+ []fpu.State(nil),
}
}
panic(fmt.Sprintf("unknown architecture %v", arch))
diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go
index 0c73fcbfb..5d7b99bd9 100644
--- a/pkg/sentry/arch/arch_x86_impl.go
+++ b/pkg/sentry/arch/arch_x86_impl.go
@@ -18,6 +18,7 @@ package arch
import (
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
)
// State contains the common architecture bits for X86 (the build tag of this
@@ -29,7 +30,7 @@ type State struct {
Regs Registers
// Our floating point state.
- x86FPState `state:"wait"`
+ fpState fpu.State `state:"wait"`
// FeatureSet is a pointer to the currently active feature set.
FeatureSet *cpuid.FeatureSet
diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD
new file mode 100644
index 000000000..0a5395267
--- /dev/null
+++ b/pkg/sentry/arch/fpu/BUILD
@@ -0,0 +1,21 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "fpu",
+ srcs = [
+ "fpu.go",
+ "fpu_amd64.go",
+ "fpu_amd64.s",
+ "fpu_arm64.go",
+ ],
+ visibility = ["//:sandbox"],
+ deps = [
+ "//pkg/cpuid",
+ "//pkg/sync",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/pkg/sentry/arch/fpu/fpu.go b/pkg/sentry/arch/fpu/fpu.go
new file mode 100644
index 000000000..867d309a3
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu.go
@@ -0,0 +1,54 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fpu provides basic floating point helpers.
+package fpu
+
+import (
+ "fmt"
+ "reflect"
+)
+
+// State represents floating point state.
+//
+// This is a simple byte slice, but may have architecture-specific methods
+// attached to it.
+type State []byte
+
+// ErrLoadingState indicates a failed restore due to unusable floating point
+// state.
+type ErrLoadingState struct {
+ // supported is the supported floating point state.
+ supportedFeatures uint64
+
+ // saved is the saved floating point state.
+ savedFeatures uint64
+}
+
+// Error returns a sensible description of the restore error.
+func (e ErrLoadingState) Error() string {
+ return fmt.Sprintf("floating point state contains unsupported features; supported: %#x saved: %#x", e.supportedFeatures, e.savedFeatures)
+}
+
+// alignedBytes returns a slice of size bytes, aligned in memory to the given
+// alignment. This is used because we require certain structures to be aligned
+// in a specific way (for example, the X86 floating point data).
+func alignedBytes(size, alignment uint) []byte {
+ data := make([]byte, size+alignment-1)
+ offset := uint(reflect.ValueOf(data).Index(0).Addr().Pointer() % uintptr(alignment))
+ if offset == 0 {
+ return data[:size:size]
+ }
+ return data[alignment-offset:][:size:size]
+}
diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go
new file mode 100644
index 000000000..3a62f51be
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu_amd64.go
@@ -0,0 +1,280 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64 i386
+
+package fpu
+
+import (
+ "io"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// initX86FPState (defined in asm files) sets up initial state.
+func initX86FPState(data *byte, useXsave bool)
+
+func newX86FPStateSlice() State {
+ size, align := cpuid.HostFeatureSet().ExtendedStateSize()
+ capacity := size
+ // Always use at least 4096 bytes.
+ //
+ // For the KVM platform, this state is a fixed 4096 bytes, so make sure
+ // that the underlying array is at _least_ that size otherwise we will
+ // corrupt random memory. This is not a pleasant thing to debug.
+ if capacity < 4096 {
+ capacity = 4096
+ }
+ return alignedBytes(capacity, align)[:size]
+}
+
+// NewState returns an initialized floating point state.
+//
+// The returned state is large enough to store all floating point state
+// supported by host, even if the app won't use much of it due to a restricted
+// FeatureSet. Since they may still be able to see state not advertised by
+// CPUID we must ensure it does not contain any sentry state.
+func NewState() State {
+ f := newX86FPStateSlice()
+ initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave())
+ return f
+}
+
+// Fork creates and returns an identical copy of the x86 floating point state.
+func (s *State) Fork() State {
+ n := newX86FPStateSlice()
+ copy(n, *s)
+ return n
+}
+
+// ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type
+// manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently,
+// ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area.
+const ptraceFPRegsSize = 512
+
+// PtraceGetFPRegs implements Context.PtraceGetFPRegs.
+func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) {
+ if maxlen < ptraceFPRegsSize {
+ return 0, syserror.EFAULT
+ }
+
+ return dst.Write((*s)[:ptraceFPRegsSize])
+}
+
+// PtraceSetFPRegs implements Context.PtraceSetFPRegs.
+func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) {
+ if maxlen < ptraceFPRegsSize {
+ return 0, syserror.EFAULT
+ }
+
+ var f [ptraceFPRegsSize]byte
+ n, err := io.ReadFull(src, f[:])
+ if err != nil {
+ return 0, err
+ }
+ // Force reserved bits in MXCSR to 0. This is consistent with Linux.
+ sanitizeMXCSR(State(f[:]))
+ // N.B. this only copies the beginning of the FP state, which
+ // corresponds to the FXSAVE area.
+ copy(*s, f[:])
+ return n, nil
+}
+
+const (
+ // mxcsrOffset is the offset in bytes of the MXCSR field from the start of
+ // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE
+ // Area")
+ mxcsrOffset = 24
+
+ // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the
+ // start of the FXSAVE area.
+ mxcsrMaskOffset = 28
+)
+
+var (
+ mxcsrMask uint32
+ initMXCSRMask sync.Once
+)
+
+const (
+ // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal
+ // to the size of the XSAVE legacy area (512 bytes) plus the size of the
+ // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's
+ // X86_XSTATE_SSE_SIZE.
+ minXstateBytes = 512 + 64
+
+ // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD
+ // field in Linux's struct user_xstateregs, which is the type manipulated
+ // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently,
+ // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET.
+ userXstateXCR0Offset = 464
+
+ // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86
+ // XSAVE area.
+ xstateBVOffset = 512
+
+ // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the
+ // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is
+ // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE
+ // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header".
+ // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP
+ // exceptions resulting from invalid values; we aren't. Linux also never
+ // uses the compacted format when doing XSAVE and doesn't even define the
+ // compaction extensions to XSAVE as a CPU feature, so for simplicity we
+ // assume no one is using them.
+ xsaveHeaderZeroedOffset = 512 + 8
+ xsaveHeaderZeroedBytes = 64 - 8
+)
+
+// sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR
+// generates a general-protection fault (#GP) in response to an attempt to set
+// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section
+// 10.5.1.2 "SSE State")
+func sanitizeMXCSR(f State) {
+ mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:])
+ initMXCSRMask.Do(func() {
+ temp := State(alignedBytes(uint(ptraceFPRegsSize), 16))
+ initX86FPState(&temp[0], false /* useXsave */)
+ mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
+ if mxcsrMask == 0 {
+ // "If the value of the MXCSR_MASK field is 00000000H, then the
+ // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM
+ // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR
+ // Register"
+ mxcsrMask = 0xffbf
+ }
+ })
+ mxcsr &= mxcsrMask
+ usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr)
+}
+
+// PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by
+// writing the floating point registers from this state to dst and returning the
+// number of bytes written, which must be less than or equal to maxlen.
+func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid.FeatureSet) (int, error) {
+ // N.B. s.x86FPState may contain more state than the application
+ // expects. We only copy the subset that would be in their XSAVE area.
+ ess, _ := featureSet.ExtendedStateSize()
+ f := make([]byte, ess)
+ copy(f, *s)
+ // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are
+ // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE
+ // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE
+ // mask. GDB relies on this: see
+ // gdb/x86-linux-nat.c:x86_linux_read_description().
+ usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask())
+ if len(f) > maxlen {
+ f = f[:maxlen]
+ }
+ return dst.Write(f)
+}
+
+// PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by
+// reading floating point registers from src and returning the number of bytes
+// read, which must be less than or equal to maxlen.
+func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid.FeatureSet) (int, error) {
+ // Allow users to pass an xstate register set smaller than ours (they can
+ // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes.
+ // Also allow users to pass a register set larger than ours; anything after
+ // their ExtendedStateSize will be ignored. (I think Linux technically
+ // permits setting a register set smaller than minXstateBytes, but it has
+ // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().)
+ if maxlen < minXstateBytes {
+ return 0, unix.EFAULT
+ }
+ ess, _ := featureSet.ExtendedStateSize()
+ if maxlen > int(ess) {
+ maxlen = int(ess)
+ }
+ f := make([]byte, maxlen)
+ if _, err := io.ReadFull(src, f); err != nil {
+ return 0, err
+ }
+ // Force reserved bits in MXCSR to 0. This is consistent with Linux.
+ sanitizeMXCSR(State(f))
+ // Users can't enable *more* XCR0 bits than what we, and the CPU, support.
+ xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:])
+ xstateBV &= featureSet.ValidXCR0Mask()
+ usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV)
+ // Force XCOMP_BV and reserved bytes in the XSAVE header to 0.
+ reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes]
+ for i := range reserved {
+ reserved[i] = 0
+ }
+ return copy(*s, f), nil
+}
+
+// BytePointer returns a pointer to the first byte of the state.
+//
+//go:nosplit
+func (s *State) BytePointer() *byte {
+ return &(*s)[0]
+}
+
+// XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87
+// and SSE state, so this is the equivalent XSTATE_BV value.
+const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE
+
+// AfterLoad converts the loaded state to the format that compatible with the
+// current processor.
+func (s *State) AfterLoad() {
+ old := *s
+
+ // Recreate the slice. This is done to ensure that it is aligned
+ // appropriately in memory, and large enough to accommodate any new
+ // state that may be saved by the new CPU. Even if extraneous new state
+ // is saved, the state we care about is guaranteed to be a subset of
+ // new state. Later optimizations can use less space when using a
+ // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has
+ // more info.
+ *s = NewState()
+
+ // x86FPState always contains all the FP state supported by the host.
+ // We may have come from a newer machine that supports additional state
+ // which we cannot restore.
+ //
+ // The x86 FP state areas are backwards compatible, so we can simply
+ // truncate the additional floating point state.
+ //
+ // Applications should not depend on the truncated state because it
+ // should relate only to features that were not exposed in the app
+ // FeatureSet. However, because we do not *prevent* them from using
+ // this state, we must verify here that there is no in-use state
+ // (according to XSTATE_BV) which we do not support.
+ if len(*s) < len(old) {
+ // What do we support?
+ supportedBV := fxsaveBV
+ if fs := cpuid.HostFeatureSet(); fs.UseXsave() {
+ supportedBV = fs.ValidXCR0Mask()
+ }
+
+ // What was in use?
+ savedBV := fxsaveBV
+ if len(old) >= xstateBVOffset+8 {
+ savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:])
+ }
+
+ // Supported features must be a superset of saved features.
+ if savedBV&^supportedBV != 0 {
+ panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV})
+ }
+ }
+
+ // Copy to the new, aligned location.
+ copy(*s, old)
+}
diff --git a/pkg/sentry/arch/arch_amd64.s b/pkg/sentry/arch/fpu/fpu_amd64.s
index 6c10336e7..6c10336e7 100644
--- a/pkg/sentry/arch/arch_amd64.s
+++ b/pkg/sentry/arch/fpu/fpu_amd64.s
diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go
new file mode 100644
index 000000000..d2f62631d
--- /dev/null
+++ b/pkg/sentry/arch/fpu/fpu_arm64.go
@@ -0,0 +1,63 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package fpu
+
+const (
+ // fpsimdMagic is the magic number which is used in fpsimd_context.
+ fpsimdMagic = 0x46508001
+
+ // fpsimdContextSize is the size of fpsimd_context.
+ fpsimdContextSize = 0x210
+)
+
+// initAarch64FPState sets up initial state.
+//
+// Related code in Linux kernel: fpsimd_flush_thread().
+// FPCR = FPCR_RM_RN (0x0 << 22).
+//
+// Currently, aarch64FPState is only a space of 0x210 length for fpstate.
+// The fp head is useless in sentry/ptrace/kvm.
+//
+func initAarch64FPState(data *State) {
+}
+
+func newAarch64FPStateSlice() []byte {
+ return alignedBytes(4096, 16)[:fpsimdContextSize]
+}
+
+// NewState returns an initialized floating point state.
+//
+// The returned state is large enough to store all floating point state
+// supported by host, even if the app won't use much of it due to a restricted
+// FeatureSet.
+func NewState() State {
+ f := State(newAarch64FPStateSlice())
+ initAarch64FPState(&f)
+ return f
+}
+
+// Fork creates and returns an identical copy of the aarch64 floating point state.
+func (s *State) Fork() State {
+ n := State(newAarch64FPStateSlice())
+ copy(n, *s)
+ return n
+}
+
+// BytePointer returns a pointer to the first byte of the state.
+func (s *State) BytePointer() *byte {
+ return &(*s)[0]
+}
diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go
index e6557cab6..ee3743483 100644
--- a/pkg/sentry/arch/signal_amd64.go
+++ b/pkg/sentry/arch/signal_amd64.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal/primitive"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -98,7 +99,7 @@ func (c *context64) NewSignalStack() NativeSignalStack {
const _FP_XSTATE_MAGIC2_SIZE = 4
func (c *context64) fpuFrameSize() (size int, useXsave bool) {
- size = len(c.x86FPState)
+ size = len(c.fpState)
if size > 512 {
// Make room for the magic cookie at the end of the xsave frame.
size += _FP_XSTATE_MAGIC2_SIZE
@@ -226,10 +227,10 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
c.Regs.Ss = userDS
// Save the thread's floating point state.
- c.sigFPState = append(c.sigFPState, c.x86FPState)
+ c.sigFPState = append(c.sigFPState, c.fpState)
// Signal handler gets a clean floating point state.
- c.x86FPState = newX86FPState()
+ c.fpState = fpu.NewState()
return nil
}
@@ -273,7 +274,7 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt
// Restore floating point state.
l := len(c.sigFPState)
if l > 0 {
- c.x86FPState = c.sigFPState[l-1]
+ c.fpState = c.sigFPState[l-1]
// NOTE(cl/133042258): State save requires that any slice
// elements from '[len:cap]' to be zero value.
c.sigFPState[l-1] = nil
diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go
index 4491008c2..53281dcba 100644
--- a/pkg/sentry/arch/signal_arm64.go
+++ b/pkg/sentry/arch/signal_arm64.go
@@ -20,6 +20,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -139,9 +140,9 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
c.Regs.Regs[30] = uint64(act.Restorer)
// Save the thread's floating point state.
- c.sigFPState = append(c.sigFPState, c.aarch64FPState)
+ c.sigFPState = append(c.sigFPState, c.fpState)
// Signal handler gets a clean floating point state.
- c.aarch64FPState = newAarch64FPState()
+ c.fpState = fpu.NewState()
return nil
}
@@ -166,7 +167,7 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt
// Restore floating point state.
l := len(c.sigFPState)
if l > 0 {
- c.aarch64FPState = c.sigFPState[l-1]
+ c.fpState = c.sigFPState[l-1]
// NOTE(cl/133042258): State save requires that any slice
// elements from '[len:cap]' to be zero value.
c.sigFPState[l-1] = nil
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 82eda3e43..0ed7aafa5 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -380,16 +380,17 @@ func (c *CachingInodeOperations) Allocate(ctx context.Context, offset, length in
return nil
}
-// WriteOut implements fs.InodeOperations.WriteOut.
-func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
+// WriteDirtyPagesAndAttrs will write the dirty pages and attributes to the
+// gofer without calling Fsync on the remote file.
+func (c *CachingInodeOperations) WriteDirtyPagesAndAttrs(ctx context.Context, inode *fs.Inode) error {
c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
// Write dirty pages back.
- c.dataMu.Lock()
err := SyncDirtyAll(ctx, &c.cache, &c.dirty, uint64(c.attr.Size), c.mfp.MemoryFile(), c.backingFile.WriteFromBlocksAt)
- c.dataMu.Unlock()
if err != nil {
- c.attrMu.Unlock()
return err
}
@@ -399,12 +400,18 @@ func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode)
// Write out cached attributes.
if err := c.backingFile.SetMaskedAttributes(ctx, c.dirtyAttr, c.attr, false); err != nil {
- c.attrMu.Unlock()
return err
}
c.dirtyAttr = fs.AttrMask{}
- c.attrMu.Unlock()
+ return nil
+}
+
+// WriteOut implements fs.InodeOperations.WriteOut.
+func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
+ if err := c.WriteDirtyPagesAndAttrs(ctx, inode); err != nil {
+ return err
+ }
// Fsync the remote file.
return c.backingFile.Sync(ctx)
diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go
index 06d450ba6..8f5a87120 100644
--- a/pkg/sentry/fs/gofer/file.go
+++ b/pkg/sentry/fs/gofer/file.go
@@ -204,20 +204,8 @@ func (f *fileOperations) readdirAll(ctx context.Context) (map[string]fs.DentAttr
return entries, nil
}
-// maybeSync will call FSync on the file if either the cache policy or file
-// flags require it.
+// maybeSync will call FSync on the file if the file flags require it.
func (f *fileOperations) maybeSync(ctx context.Context, file *fs.File, offset, n int64) error {
- if n == 0 {
- // Nothing to sync.
- return nil
- }
-
- if f.inodeOperations.session().cachePolicy.writeThrough(file.Dirent.Inode) {
- // Call WriteOut directly, as some "writethrough" filesystems
- // do not support sync.
- return f.inodeOperations.cachingInodeOps.WriteOut(ctx, file.Dirent.Inode)
- }
-
flags := file.Flags()
var syncType fs.SyncType
switch {
@@ -254,6 +242,19 @@ func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.I
n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
}
+ if n == 0 {
+ // Nothing written. We are done.
+ return 0, err
+ }
+
+ // Write the dirty pages and attributes if cache policy tells us to.
+ if f.inodeOperations.session().cachePolicy.writeThrough(file.Dirent.Inode) {
+ if werr := f.inodeOperations.cachingInodeOps.WriteDirtyPagesAndAttrs(ctx, file.Dirent.Inode); werr != nil {
+ // Report no bytes written since the write faild.
+ return 0, werr
+ }
+ }
+
// We may need to sync the written bytes.
if syncErr := f.maybeSync(ctx, file, offset, n); syncErr != nil {
// Sync failed. Report 0 bytes written, since none of them are
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index c34451269..43c3c5a2d 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -783,7 +783,15 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
creds := rp.Credentials()
return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, _ **[]*dentry) error {
- if _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)); err != nil {
+ // If the parent is a setgid directory, use the parent's GID
+ // rather than the caller's and enable setgid.
+ kgid := creds.EffectiveKGID
+ mode := opts.Mode
+ if atomic.LoadUint32(&parent.mode)&linux.S_ISGID != 0 {
+ kgid = auth.KGID(atomic.LoadUint32(&parent.gid))
+ mode |= linux.S_ISGID
+ }
+ if _, err := parent.file.mkdir(ctx, name, p9.FileMode(mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid)); err != nil {
if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
return err
}
@@ -1145,7 +1153,15 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
name := rp.Component()
// We only want the access mode for creating the file.
createFlags := p9.OpenFlags(opts.Flags) & p9.OpenFlagsModeMask
- fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+
+ // If the parent is a setgid directory, use the parent's GID rather
+ // than the caller's.
+ kgid := creds.EffectiveKGID
+ if atomic.LoadUint32(&d.mode)&linux.S_ISGID != 0 {
+ kgid = auth.KGID(atomic.LoadUint32(&d.gid))
+ }
+
+ fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, p9.FileMode(opts.Mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid))
if err != nil {
dirfile.close(ctx)
return nil, err
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 71569dc65..692da02c1 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1102,10 +1102,26 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
d.metadataMu.Lock()
defer d.metadataMu.Unlock()
+
+ // As with Linux, if the UID, GID, or file size is changing, we have to
+ // clear permission bits. Note that when set, clearSGID causes
+ // permissions to be updated, but does not modify stat.Mask, as
+ // modification would cause an extra inotify flag to be set.
+ clearSGID := stat.Mask&linux.STATX_UID != 0 && stat.UID != atomic.LoadUint32(&d.uid) ||
+ stat.Mask&linux.STATX_GID != 0 && stat.GID != atomic.LoadUint32(&d.gid) ||
+ stat.Mask&linux.STATX_SIZE != 0
+ if clearSGID {
+ if stat.Mask&linux.STATX_MODE != 0 {
+ stat.Mode = uint16(vfs.ClearSUIDAndSGID(uint32(stat.Mode)))
+ } else {
+ stat.Mode = uint16(vfs.ClearSUIDAndSGID(atomic.LoadUint32(&d.mode)))
+ }
+ }
+
if !d.isSynthetic() {
if stat.Mask != 0 {
if err := d.file.setAttr(ctx, p9.SetAttrMask{
- Permissions: stat.Mask&linux.STATX_MODE != 0,
+ Permissions: stat.Mask&linux.STATX_MODE != 0 || clearSGID,
UID: stat.Mask&linux.STATX_UID != 0,
GID: stat.Mask&linux.STATX_GID != 0,
Size: stat.Mask&linux.STATX_SIZE != 0,
@@ -1140,7 +1156,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
return nil
}
}
- if stat.Mask&linux.STATX_MODE != 0 {
+ if stat.Mask&linux.STATX_MODE != 0 || clearSGID {
atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode))
}
if stat.Mask&linux.STATX_UID != 0 {
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 283b220bb..4f1ad0c88 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -266,6 +266,20 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
return 0, offset, err
}
}
+
+ // As with Linux, writing clears the setuid and setgid bits.
+ if n > 0 {
+ oldMode := atomic.LoadUint32(&d.mode)
+ // If setuid or setgid were set, update d.mode and propagate
+ // changes to the host.
+ if newMode := vfs.ClearSUIDAndSGID(oldMode); newMode != oldMode {
+ atomic.StoreUint32(&d.mode, newMode)
+ if err := d.file.setAttr(ctx, p9.SetAttrMask{Permissions: true}, p9.SetAttr{Permissions: p9.FileMode(newMode)}); err != nil {
+ return 0, offset, err
+ }
+ }
+ }
+
return n, offset + n, nil
}
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 84e37f793..46c500427 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -689,13 +689,9 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
}
return err
}
- creds := rp.Credentials()
+
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
- Stat: linux.Statx{
- Mask: linux.STATX_UID | linux.STATX_GID,
- UID: uint32(creds.EffectiveKUID),
- GID: uint32(creds.EffectiveKGID),
- },
+ Stat: parent.newChildOwnerStat(opts.Mode, rp.Credentials()),
}); err != nil {
if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr))
@@ -750,11 +746,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
}
creds := rp.Credentials()
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
- Stat: linux.Statx{
- Mask: linux.STATX_UID | linux.STATX_GID,
- UID: uint32(creds.EffectiveKUID),
- GID: uint32(creds.EffectiveKGID),
- },
+ Stat: parent.newChildOwnerStat(opts.Mode, creds),
}); err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr))
@@ -963,14 +955,11 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
}
return nil, err
}
+
// Change the file's owner to the caller. We can't use upperFD.SetStat()
// because it will pick up creds from ctx.
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
- Stat: linux.Statx{
- Mask: linux.STATX_UID | linux.STATX_GID,
- UID: uint32(creds.EffectiveKUID),
- GID: uint32(creds.EffectiveKGID),
- },
+ Stat: parent.newChildOwnerStat(opts.Mode, creds),
}); err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr))
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 58680bc80..454c20d4f 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -749,6 +749,27 @@ func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
)
}
+// newChildOwnerStat returns a Statx for configuring the UID, GID, and mode of
+// children.
+func (d *dentry) newChildOwnerStat(mode linux.FileMode, creds *auth.Credentials) linux.Statx {
+ stat := linux.Statx{
+ Mask: uint32(linux.STATX_UID | linux.STATX_GID),
+ UID: uint32(creds.EffectiveKUID),
+ GID: uint32(creds.EffectiveKGID),
+ }
+ // Set GID and possibly the SGID bit if the parent is an SGID directory.
+ d.copyMu.RLock()
+ defer d.copyMu.RUnlock()
+ if atomic.LoadUint32(&d.mode)&linux.ModeSetGID == linux.ModeSetGID {
+ stat.GID = atomic.LoadUint32(&d.gid)
+ if stat.Mode&linux.ModeDirectory == linux.ModeDirectory {
+ stat.Mode = uint16(mode) | linux.ModeSetGID
+ stat.Mask |= linux.STATX_MODE
+ }
+ }
+ return stat
+}
+
// fileDescription is embedded by overlay implementations of
// vfs.FileDescriptionImpl.
//
diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go
index 25c785fd4..d791c06db 100644
--- a/pkg/sentry/fsimpl/overlay/regular_file.go
+++ b/pkg/sentry/fsimpl/overlay/regular_file.go
@@ -205,6 +205,20 @@ func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) e
if err := wrappedFD.SetStat(ctx, opts); err != nil {
return err
}
+
+ // Changing owners may clear one or both of the setuid and setgid bits,
+ // so we may have to update opts before setting d.mode.
+ if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
+ stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{
+ Mask: linux.STATX_MODE,
+ })
+ if err != nil {
+ return err
+ }
+ opts.Stat.Mode = stat.Mode
+ opts.Stat.Mask |= linux.STATX_MODE
+ }
+
d.updateAfterSetStatLocked(&opts)
if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
@@ -295,7 +309,11 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
return 0, err
}
defer wrappedFD.DecRef(ctx)
- return wrappedFD.PWrite(ctx, src, offset, opts)
+ n, err := wrappedFD.PWrite(ctx, src, offset, opts)
+ if err != nil {
+ return n, err
+ }
+ return fd.updateSetUserGroupIDs(ctx, wrappedFD, n)
}
// Write implements vfs.FileDescriptionImpl.Write.
@@ -307,7 +325,28 @@ func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
if err != nil {
return 0, err
}
- return wrappedFD.Write(ctx, src, opts)
+ n, err := wrappedFD.Write(ctx, src, opts)
+ if err != nil {
+ return n, err
+ }
+ return fd.updateSetUserGroupIDs(ctx, wrappedFD, n)
+}
+
+func (fd *regularFileFD) updateSetUserGroupIDs(ctx context.Context, wrappedFD *vfs.FileDescription, written int64) (int64, error) {
+ // Writing can clear the setuid and/or setgid bits. We only have to
+ // check this if something was written and one of those bits was set.
+ dentry := fd.dentry()
+ if written == 0 || atomic.LoadUint32(&dentry.mode)&(linux.S_ISUID|linux.S_ISGID) == 0 {
+ return written, nil
+ }
+ stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_MODE})
+ if err != nil {
+ return written, err
+ }
+ dentry.copyMu.Lock()
+ defer dentry.copyMu.Unlock()
+ atomic.StoreUint32(&dentry.mode, uint32(stat.Mode))
+ return written, nil
}
// Seek implements vfs.FileDescriptionImpl.Seek.
diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go
index 609ad3941..7aea3dcd8 100644
--- a/pkg/sentry/kernel/ptrace_amd64.go
+++ b/pkg/sentry/kernel/ptrace_amd64.go
@@ -51,14 +51,15 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) erro
return err
case linux.PTRACE_GETFPREGS:
- _, err := target.Arch().PtraceGetFPRegs(&usermem.IOReadWriter{
+ s := target.Arch().FloatingPointData()
+ _, err := target.Arch().FloatingPointData().PtraceGetFPRegs(&usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: data,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
- })
+ }, len(*s))
return err
case linux.PTRACE_SETREGS:
@@ -73,14 +74,15 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) erro
return err
case linux.PTRACE_SETFPREGS:
- _, err := target.Arch().PtraceSetFPRegs(&usermem.IOReadWriter{
+ s := target.Arch().FloatingPointData()
+ _, err := s.PtraceSetFPRegs(&usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: data,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
- })
+ }, len(*s))
return err
default:
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 4f9e781af..03a76eb9b 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -50,6 +50,7 @@ go_library(
"//pkg/safecopy",
"//pkg/seccomp",
"//pkg/sentry/arch",
+ "//pkg/sentry/arch/fpu",
"//pkg/sentry/memmap",
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
@@ -78,6 +79,7 @@ go_test(
"//pkg/ring0",
"//pkg/ring0/pagetables",
"//pkg/sentry/arch",
+ "//pkg/sentry/arch/fpu",
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm/testutil",
"//pkg/sentry/time",
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index 308696efe..d761bbdee 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -73,7 +73,7 @@ func (c *vCPU) KernelSyscall() {
// We only trigger a bluepill entry in the bluepill function, and can
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt. We only worry about saving on exit.
- ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no.
+ ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment.
}
@@ -92,7 +92,7 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
regs.Rip = 0
}
// See above.
- ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no.
+ ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment.
}
@@ -124,5 +124,5 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
// Set the context pointer to the saved floating point state. This is
// where the guest data has been serialized, the kernel will restore
// from this new pointer value.
- context.Fpstate = uint64(uintptrValue(&c.floatingPointState[0]))
+ context.Fpstate = uint64(uintptrValue(c.floatingPointState.BytePointer()))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index c317f1e99..578852c3f 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -92,7 +92,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
lazyVfp := c.GetLazyVFP()
if lazyVfp != 0 {
- fpsimd := fpsimdPtr(&c.floatingPointState[0])
+ fpsimd := fpsimdPtr(c.floatingPointState.BytePointer())
context.Fpsimd64.Fpsr = fpsimd.Fpsr
context.Fpsimd64.Fpcr = fpsimd.Fpcr
context.Fpsimd64.Vregs = fpsimd.Vregs
@@ -112,12 +112,12 @@ func (c *vCPU) KernelSyscall() {
fpDisableTrap := ring0.CPACREL1()
if fpDisableTrap != 0 {
- fpsimd := fpsimdPtr(&c.floatingPointState[0])
+ fpsimd := fpsimdPtr(c.floatingPointState.BytePointer())
fpcr := ring0.GetFPCR()
fpsr := ring0.GetFPSR()
fpsimd.Fpcr = uint32(fpcr)
fpsimd.Fpsr = uint32(fpsr)
- ring0.SaveVRegs(&c.floatingPointState[0])
+ ring0.SaveVRegs(c.floatingPointState.BytePointer())
}
ring0.Halt()
@@ -136,12 +136,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
fpDisableTrap := ring0.CPACREL1()
if fpDisableTrap != 0 {
- fpsimd := fpsimdPtr(&c.floatingPointState[0])
+ fpsimd := fpsimdPtr(c.floatingPointState.BytePointer())
fpcr := ring0.GetFPCR()
fpsr := ring0.GetFPSR()
fpsimd.Fpcr = uint32(fpcr)
fpsimd.Fpsr = uint32(fpsr)
- ring0.SaveVRegs(&c.floatingPointState[0])
+ ring0.SaveVRegs(c.floatingPointState.BytePointer())
}
ring0.Halt()
diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go
index 76fc594a0..e44e995a0 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64_test.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go
@@ -33,7 +33,7 @@ func TestSegments(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
FullRestore: true,
}, &si); err == platform.ErrContextInterrupt {
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index 6243b9a04..5bce16dde 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -25,13 +25,14 @@ import (
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/usermem"
)
-var dummyFPState = (*byte)(arch.NewFloatingPointData())
+var dummyFPState = fpu.NewState()
type testHarness interface {
Errorf(format string, args ...interface{})
@@ -159,7 +160,7 @@ func TestApplicationSyscall(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
FullRestore: true,
}, &si); err == platform.ErrContextInterrupt {
@@ -173,7 +174,7 @@ func TestApplicationSyscall(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
return true // Retry.
@@ -190,7 +191,7 @@ func TestApplicationFault(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
FullRestore: true,
}, &si); err == platform.ErrContextInterrupt {
@@ -205,7 +206,7 @@ func TestApplicationFault(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
return true // Retry.
@@ -223,7 +224,7 @@ func TestRegistersSyscall(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
continue // Retry.
@@ -246,7 +247,7 @@ func TestRegistersFault(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
FullRestore: true,
}, &si); err == platform.ErrContextInterrupt {
@@ -272,7 +273,7 @@ func TestBounce(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err != platform.ErrContextInterrupt {
t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt)
@@ -287,7 +288,7 @@ func TestBounce(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
FullRestore: true,
}, &si); err != platform.ErrContextInterrupt {
@@ -319,7 +320,7 @@ func TestBounceStress(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err != platform.ErrContextInterrupt {
t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt)
@@ -340,7 +341,7 @@ func TestInvalidate(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
continue // Retry.
@@ -355,7 +356,7 @@ func TestInvalidate(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
Flush: true,
}, &si); err == platform.ErrContextInterrupt {
@@ -379,7 +380,7 @@ func TestEmptyAddressSpace(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
return true // Retry.
@@ -393,7 +394,7 @@ func TestEmptyAddressSpace(t *testing.T) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
FullRestore: true,
}, &si); err == platform.ErrContextInterrupt {
@@ -469,7 +470,7 @@ func BenchmarkApplicationSyscall(b *testing.B) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
a++
@@ -506,7 +507,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
- FloatingPointState: dummyFPState,
+ FloatingPointState: &dummyFPState,
PageTables: pt,
}, &si); err == platform.ErrContextInterrupt {
a++
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 916903881..3af96c7e5 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -27,6 +27,7 @@ import (
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/usermem"
@@ -70,7 +71,7 @@ type vCPUArchState struct {
// floatingPointState is the floating point state buffer used in guest
// to host transitions. See usage in bluepill_amd64.go.
- floatingPointState arch.FloatingPointData
+ floatingPointState fpu.State
}
const (
@@ -151,7 +152,7 @@ func (c *vCPU) initArchState() error {
// This will be saved prior to leaving the guest, and we restore from
// this always. We cannot use the pointer in the context alone because
// we don't know how large the area there is in reality.
- c.floatingPointState = arch.NewFloatingPointData()
+ c.floatingPointState = fpu.NewState()
// Set the time offset to the host native time.
return c.setSystemTime()
@@ -307,12 +308,12 @@ func loadByte(ptr *byte) byte {
// emulate instructions like xsave and xrstor.
//
//go:nosplit
-func prefaultFloatingPointState(data arch.FloatingPointData) {
- size := len(data)
+func prefaultFloatingPointState(data *fpu.State) {
+ size := len(*data)
for i := 0; i < size; i += usermem.PageSize {
- loadByte(&(data)[i])
+ loadByte(&(*data)[i])
}
- loadByte(&(data)[size-1])
+ loadByte(&(*data)[size-1])
}
// SwitchToUser unpacks architectural-details.
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 3d715e570..2edc9d1b2 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -32,7 +33,7 @@ type vCPUArchState struct {
// floatingPointState is the floating point state buffer used in guest
// to host transitions. See usage in bluepill_arm64.go.
- floatingPointState arch.FloatingPointData
+ floatingPointState fpu.State
}
const (
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 059aa43d0..e7d5f3193 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -150,7 +151,7 @@ func (c *vCPU) initArchState() error {
c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs)
}
- c.floatingPointState = arch.NewFloatingPointData()
+ c.floatingPointState = fpu.NewState()
return c.setSystemTime()
}
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index fc43cc3c0..47efde6a2 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -30,6 +30,7 @@ go_library(
"//pkg/safecopy",
"//pkg/seccomp",
"//pkg/sentry/arch",
+ "//pkg/sentry/arch/fpu",
"//pkg/sentry/memmap",
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 6259350ec..01e73b019 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -20,6 +20,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -62,9 +63,9 @@ func (t *thread) setRegs(regs *arch.Registers) error {
}
// getFPRegs gets the floating-point data via the GETREGSET ptrace unix.
-func (t *thread) getFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsave bool) error {
+func (t *thread) getFPRegs(fpState *fpu.State, fpLen uint64, useXsave bool) error {
iovec := unix.Iovec{
- Base: (*byte)(&fpState[0]),
+ Base: fpState.BytePointer(),
Len: fpLen,
}
_, _, errno := unix.RawSyscall6(
@@ -81,9 +82,9 @@ func (t *thread) getFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsav
}
// setFPRegs sets the floating-point data via the SETREGSET ptrace unix.
-func (t *thread) setFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsave bool) error {
+func (t *thread) setFPRegs(fpState *fpu.State, fpLen uint64, useXsave bool) error {
iovec := unix.Iovec{
- Base: (*byte)(&fpState[0]),
+ Base: fpState.BytePointer(),
Len: fpLen,
}
_, _, errno := unix.RawSyscall6(
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index 5bd526b73..efec93f73 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -75,17 +75,25 @@ func handleIOError(ctx context.Context, partialResult bool, ioerr, intr error, o
// errors, we may consume the error and return only the partial read/write.
//
// Returns false if error is unknown.
-func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, op string) (bool, error) {
- switch err {
- case nil:
+func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr error, op string) (bool, error) {
+ if errOrig == nil {
// Typical successful syscall.
return true, nil
+ }
+
+ // Translate error, if possible, to consolidate errors from other packages
+ // into a smaller set of errors from syserror package.
+ translatedErr := errOrig
+ if errno, ok := syserror.TranslateError(errOrig); ok {
+ translatedErr = errno
+ }
+ switch translatedErr {
case io.EOF:
// EOF is always consumed. If this is a partial read/write
// (result != 0), the application will see that, otherwise
// they will see 0.
return true, nil
- case syserror.ErrExceedsFileSizeLimit:
+ case syserror.EFBIG:
t := kernel.TaskFromContext(ctx)
if t == nil {
panic("I/O error should only occur from a context associated with a Task")
@@ -98,7 +106,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error,
// Simultaneously send a SIGXFSZ per setrlimit(2).
t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t))
return true, syserror.EFBIG
- case syserror.ErrInterrupted:
+ case syserror.EINTR:
// The syscall was interrupted. Return nil if it completed
// partially, otherwise return the error code that the syscall
// needs (to indicate to the kernel what it should do).
@@ -110,10 +118,10 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error,
if !partialResult {
// Typical syscall error.
- return true, err
+ return true, errOrig
}
- switch err {
+ switch translatedErr {
case syserror.EINTR:
// Syscall interrupted, but completed a partial
// read/write. Like ErrWouldBlock, since we have a
@@ -143,7 +151,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error,
// For TCP sendfile connections, we may have a reset or timeout. But we
// should just return n as the result.
return true, nil
- case syserror.ErrWouldBlock:
+ case syserror.EWOULDBLOCK:
// Syscall would block, but completed a partial read/write.
// This case should only be returned by IssueIO for nonblocking
// files. Since we have a partial read/write, we consume
@@ -151,7 +159,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error,
return true, nil
}
- switch err.(type) {
+ switch errOrig.(type) {
case syserror.SyscallRestartErrno:
// Identical to the EINTR case.
return true, nil