diff options
-rw-r--r-- | pkg/sentry/arch/BUILD | 6 | ||||
-rw-r--r-- | pkg/sentry/arch/arch_aarch64.go | 293 | ||||
-rw-r--r-- | pkg/sentry/arch/arch_arm64.go | 266 | ||||
-rw-r--r-- | pkg/sentry/arch/arch_state_aarch64.go | 38 | ||||
-rw-r--r-- | pkg/sentry/arch/arch_state_x86.go | 2 | ||||
-rw-r--r-- | pkg/sentry/arch/registers.proto | 37 | ||||
-rw-r--r-- | pkg/sentry/arch/signal.go | 250 | ||||
-rw-r--r-- | pkg/sentry/arch/signal_amd64.go | 230 | ||||
-rw-r--r-- | pkg/sentry/arch/signal_arm64.go | 126 | ||||
-rw-r--r-- | pkg/sentry/arch/signal_stack.go | 2 | ||||
-rw-r--r-- | pkg/sentry/arch/syscalls_arm64.go | 62 |
11 files changed, 1081 insertions, 231 deletions
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index ae3e364cd..65f22af2b 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -9,17 +9,23 @@ go_library( srcs = [ "aligned.go", "arch.go", + "arch_aarch64.go", "arch_amd64.go", "arch_amd64.s", + "arch_arm64.go", + "arch_state_aarch64.go", "arch_state_x86.go", "arch_x86.go", "auxv.go", + "signal.go", "signal_act.go", "signal_amd64.go", + "signal_arm64.go", "signal_info.go", "signal_stack.go", "stack.go", "syscalls_amd64.go", + "syscalls_arm64.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/arch", visibility = ["//:sandbox"], diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go new file mode 100644 index 000000000..ea4dedbdf --- /dev/null +++ b/pkg/sentry/arch/arch_aarch64.go @@ -0,0 +1,293 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package arch + +import ( + "fmt" + "io" + "syscall" + + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" +) + +const ( + // SyscallWidth is the width of insturctions. + SyscallWidth = 4 +) + +// aarch64FPState is aarch64 floating point state. +type aarch64FPState []byte + +// initAarch64FPState (defined in asm files) sets up initial state. +func initAarch64FPState(data *FloatingPointData) { + // TODO(gvisor.dev/issue/1238): floating-point is not supported. +} + +func newAarch64FPStateSlice() []byte { + return alignedBytes(4096, 32)[:4096] +} + +// newAarch64FPState returns an initialized floating point state. +// +// The returned state is large enough to store all floating point state +// supported by host, even if the app won't use much of it due to a restricted +// FeatureSet. Since they may still be able to see state not advertised by +// CPUID we must ensure it does not contain any sentry state. +func newAarch64FPState() aarch64FPState { + f := aarch64FPState(newAarch64FPStateSlice()) + initAarch64FPState(f.FloatingPointData()) + return f +} + +// fork creates and returns an identical copy of the aarch64 floating point state. +func (f aarch64FPState) fork() aarch64FPState { + n := aarch64FPState(newAarch64FPStateSlice()) + copy(n, f) + return n +} + +// FloatingPointData returns the raw data pointer. +func (f aarch64FPState) FloatingPointData() *FloatingPointData { + return (*FloatingPointData)(&f[0]) +} + +// NewFloatingPointData returns a new floating point data blob. +// +// This is primarily for use in tests. +func NewFloatingPointData() *FloatingPointData { + return (*FloatingPointData)(&(newAarch64FPState()[0])) +} + +// State contains the common architecture bits for aarch64 (the build tag of this +// file ensures it's only built on aarch64). +type State struct { + // The system registers. + Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"` + + // Our floating point state. + aarch64FPState `state:"wait"` + + // FeatureSet is a pointer to the currently active feature set. + FeatureSet *cpuid.FeatureSet +} + +// Proto returns a protobuf representation of the system registers in State. +func (s State) Proto() *rpb.Registers { + regs := &rpb.ARM64Registers{ + R0: s.Regs.Regs[0], + R1: s.Regs.Regs[1], + R2: s.Regs.Regs[2], + R3: s.Regs.Regs[3], + R4: s.Regs.Regs[4], + R5: s.Regs.Regs[5], + R6: s.Regs.Regs[6], + R7: s.Regs.Regs[7], + R8: s.Regs.Regs[8], + R9: s.Regs.Regs[9], + R10: s.Regs.Regs[10], + R11: s.Regs.Regs[11], + R12: s.Regs.Regs[12], + R13: s.Regs.Regs[13], + R14: s.Regs.Regs[14], + R15: s.Regs.Regs[15], + R16: s.Regs.Regs[16], + R17: s.Regs.Regs[17], + R18: s.Regs.Regs[18], + R19: s.Regs.Regs[19], + R20: s.Regs.Regs[20], + R21: s.Regs.Regs[21], + R22: s.Regs.Regs[22], + R23: s.Regs.Regs[23], + R24: s.Regs.Regs[24], + R25: s.Regs.Regs[25], + R26: s.Regs.Regs[26], + R27: s.Regs.Regs[27], + R28: s.Regs.Regs[28], + R29: s.Regs.Regs[29], + R30: s.Regs.Regs[30], + Sp: s.Regs.Sp, + Pc: s.Regs.Pc, + Pstate: s.Regs.Pstate, + } + return &rpb.Registers{Arch: &rpb.Registers_Arm64{Arm64: regs}} +} + +// Fork creates and returns an identical copy of the state. +func (s *State) Fork() State { + // TODO(gvisor.dev/issue/1238): floating-point is not supported. + return State{ + Regs: s.Regs, + FeatureSet: s.FeatureSet, + } +} + +// StateData implements Context.StateData. +func (s *State) StateData() *State { + return s +} + +// CPUIDEmulate emulates a cpuid instruction. +func (s *State) CPUIDEmulate(l log.Logger) { + // TODO(gvisor.dev/issue/1255): cpuid is not supported. +} + +// SingleStep implements Context.SingleStep. +func (s *State) SingleStep() bool { + return false +} + +// SetSingleStep enables single stepping. +func (s *State) SetSingleStep() { + // Set the trap flag. + // TODO(gvisor.dev/issue/1239): ptrace single-step is not supported. +} + +// ClearSingleStep enables single stepping. +func (s *State) ClearSingleStep() { + // Clear the trap flag. + // TODO(gvisor.dev/issue/1239): ptrace single-step is not supported. +} + +// RegisterMap returns a map of all registers. +func (s *State) RegisterMap() (map[string]uintptr, error) { + return map[string]uintptr{ + "R0": uintptr(s.Regs.Regs[0]), + "R1": uintptr(s.Regs.Regs[1]), + "R2": uintptr(s.Regs.Regs[2]), + "R3": uintptr(s.Regs.Regs[3]), + "R4": uintptr(s.Regs.Regs[4]), + "R5": uintptr(s.Regs.Regs[5]), + "R6": uintptr(s.Regs.Regs[6]), + "R7": uintptr(s.Regs.Regs[7]), + "R8": uintptr(s.Regs.Regs[8]), + "R9": uintptr(s.Regs.Regs[9]), + "R10": uintptr(s.Regs.Regs[10]), + "R11": uintptr(s.Regs.Regs[11]), + "R12": uintptr(s.Regs.Regs[12]), + "R13": uintptr(s.Regs.Regs[13]), + "R14": uintptr(s.Regs.Regs[14]), + "R15": uintptr(s.Regs.Regs[15]), + "R16": uintptr(s.Regs.Regs[16]), + "R17": uintptr(s.Regs.Regs[17]), + "R18": uintptr(s.Regs.Regs[18]), + "R19": uintptr(s.Regs.Regs[19]), + "R20": uintptr(s.Regs.Regs[20]), + "R21": uintptr(s.Regs.Regs[21]), + "R22": uintptr(s.Regs.Regs[22]), + "R23": uintptr(s.Regs.Regs[23]), + "R24": uintptr(s.Regs.Regs[24]), + "R25": uintptr(s.Regs.Regs[25]), + "R26": uintptr(s.Regs.Regs[26]), + "R27": uintptr(s.Regs.Regs[27]), + "R28": uintptr(s.Regs.Regs[28]), + "R29": uintptr(s.Regs.Regs[29]), + "R30": uintptr(s.Regs.Regs[30]), + "Sp": uintptr(s.Regs.Sp), + "Pc": uintptr(s.Regs.Pc), + "Pstate": uintptr(s.Regs.Pstate), + }, nil +} + +// PtraceGetRegs implements Context.PtraceGetRegs. +func (s *State) PtraceGetRegs(dst io.Writer) (int, error) { + return dst.Write(binary.Marshal(nil, usermem.ByteOrder, s.ptraceGetRegs())) +} + +func (s *State) ptraceGetRegs() syscall.PtraceRegs { + return s.Regs +} + +var ptraceRegsSize = int(binary.Size(syscall.PtraceRegs{})) + +// PtraceSetRegs implements Context.PtraceSetRegs. +func (s *State) PtraceSetRegs(src io.Reader) (int, error) { + var regs syscall.PtraceRegs + buf := make([]byte, ptraceRegsSize) + if _, err := io.ReadFull(src, buf); err != nil { + return 0, err + } + binary.Unmarshal(buf, usermem.ByteOrder, ®s) + s.Regs = regs + return ptraceRegsSize, nil +} + +// PtraceGetFPRegs implements Context.PtraceGetFPRegs. +func (s *State) PtraceGetFPRegs(dst io.Writer) (int, error) { + // TODO(gvisor.dev/issue/1238): floating-point is not supported. + return 0, nil +} + +// PtraceSetFPRegs implements Context.PtraceSetFPRegs. +func (s *State) PtraceSetFPRegs(src io.Reader) (int, error) { + // TODO(gvisor.dev/issue/1238): floating-point is not supported. + return 0, nil +} + +// Register sets defined in include/uapi/linux/elf.h. +const ( + _NT_PRSTATUS = 1 + _NT_PRFPREG = 2 +) + +// PtraceGetRegSet implements Context.PtraceGetRegSet. +func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, error) { + switch regset { + case _NT_PRSTATUS: + if maxlen < ptraceRegsSize { + return 0, syserror.EFAULT + } + return s.PtraceGetRegs(dst) + default: + return 0, syserror.EINVAL + } +} + +// PtraceSetRegSet implements Context.PtraceSetRegSet. +func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, error) { + switch regset { + case _NT_PRSTATUS: + if maxlen < ptraceRegsSize { + return 0, syserror.EFAULT + } + return s.PtraceSetRegs(src) + default: + return 0, syserror.EINVAL + } +} + +// FullRestore indicates whether a full restore is required. +func (s *State) FullRestore() bool { + return false +} + +// New returns a new architecture context. +func New(arch Arch, fs *cpuid.FeatureSet) Context { + switch arch { + case ARM64: + return &context64{ + State{ + FeatureSet: fs, + }, + } + } + panic(fmt.Sprintf("unknown architecture %v", arch)) +} diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go new file mode 100644 index 000000000..0d5b7d317 --- /dev/null +++ b/pkg/sentry/arch/arch_arm64.go @@ -0,0 +1,266 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arch + +import ( + "fmt" + "math/rand" + "syscall" + + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +// Host specifies the host architecture. +const Host = ARM64 + +// These constants come directly from Linux. +const ( + // maxAddr64 is the maximum userspace address. It is TASK_SIZE in Linux + // for a 64-bit process. + maxAddr64 usermem.Addr = (1 << 48) + + // maxStackRand64 is the maximum randomization to apply to the stack. + // It is defined by arch/arm64/mm/mmap.c:(STACK_RND_MASK << PAGE_SHIFT) in Linux. + maxStackRand64 = 0x3ffff << 12 // 16 GB + + // maxMmapRand64 is the maximum randomization to apply to the mmap + // layout. It is defined by arch/arm64/mm/mmap.c:arch_mmap_rnd in Linux. + maxMmapRand64 = (1 << 33) * usermem.PageSize + + // minGap64 is the minimum gap to leave at the top of the address space + // for the stack. It is defined by arch/arm64/mm/mmap.c:MIN_GAP in Linux. + minGap64 = (128 << 20) + maxStackRand64 + + // preferredPIELoadAddr is the standard Linux position-independent + // executable base load address. It is ELF_ET_DYN_BASE in Linux. + // + // The Platform {Min,Max}UserAddress() may preclude loading at this + // address. See other preferredFoo comments below. + preferredPIELoadAddr usermem.Addr = maxAddr64 / 6 * 5 +) + +// These constants are selected as heuristics to help make the Platform's +// potentially limited address space conform as closely to Linux as possible. +const ( + preferredTopDownAllocMin usermem.Addr = 0x7e8000000000 + preferredAllocationGap = 128 << 30 // 128 GB + preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap + + // minMmapRand64 is the smallest we are willing to make the + // randomization to stay above preferredTopDownBaseMin. + minMmapRand64 = (1 << 18) * usermem.PageSize +) + +// context64 represents an ARM64 context. +type context64 struct { + State +} + +// Arch implements Context.Arch. +func (c *context64) Arch() Arch { + return ARM64 +} + +// Fork returns an exact copy of this context. +func (c *context64) Fork() Context { + return &context64{ + State: c.State.Fork(), + } +} + +// General purpose registers usage on Arm64: +// R0...R7: parameter/result registers. +// R8: indirect result location register. +// R9...R15: temporary rgisters. +// R16: the first intra-procedure-call scratch register. +// R17: the second intra-procedure-call scratch register. +// R18: the platform register. +// R19...R28: callee-saved registers. +// R29: the frame pointer. +// R30: the link register. + +// Return returns the current syscall return value. +func (c *context64) Return() uintptr { + return uintptr(c.Regs.Regs[0]) +} + +// SetReturn sets the syscall return value. +func (c *context64) SetReturn(value uintptr) { + c.Regs.Regs[0] = uint64(value) +} + +// IP returns the current instruction pointer. +func (c *context64) IP() uintptr { + return uintptr(c.Regs.Pc) +} + +// SetIP sets the current instruction pointer. +func (c *context64) SetIP(value uintptr) { + c.Regs.Pc = uint64(value) +} + +// Stack returns the current stack pointer. +func (c *context64) Stack() uintptr { + return uintptr(c.Regs.Sp) +} + +// SetStack sets the current stack pointer. +func (c *context64) SetStack(value uintptr) { + c.Regs.Sp = uint64(value) +} + +// TLS returns the current TLS pointer. +func (c *context64) TLS() uintptr { + // TODO(gvisor.dev/issue/1238): TLS is not supported. + // MRS_TPIDR_EL0 + return 0 +} + +// SetTLS sets the current TLS pointer. Returns false if value is invalid. +func (c *context64) SetTLS(value uintptr) bool { + // TODO(gvisor.dev/issue/1238): TLS is not supported. + // MSR_TPIDR_EL0 + return false +} + +// SetRSEQInterruptedIP implements Context.SetRSEQInterruptedIP. +func (c *context64) SetRSEQInterruptedIP(value uintptr) { + c.Regs.Regs[3] = uint64(value) +} + +// Native returns the native type for the given val. +func (c *context64) Native(val uintptr) interface{} { + v := uint64(val) + return &v +} + +// Value returns the generic val for the given native type. +func (c *context64) Value(val interface{}) uintptr { + return uintptr(*val.(*uint64)) +} + +// Width returns the byte width of this architecture. +func (c *context64) Width() uint { + return 8 +} + +// FeatureSet returns the FeatureSet in use. +func (c *context64) FeatureSet() *cpuid.FeatureSet { + return c.State.FeatureSet +} + +// mmapRand returns a random adjustment for randomizing an mmap layout. +func mmapRand(max uint64) usermem.Addr { + return usermem.Addr(rand.Int63n(int64(max))).RoundDown() +} + +// NewMmapLayout implements Context.NewMmapLayout consistently with Linux. +func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (MmapLayout, error) { + min, ok := min.RoundUp() + if !ok { + return MmapLayout{}, syscall.EINVAL + } + if max > maxAddr64 { + max = maxAddr64 + } + max = max.RoundDown() + + if min > max { + return MmapLayout{}, syscall.EINVAL + } + + stackSize := r.Get(limits.Stack) + + // MAX_GAP in Linux. + maxGap := (max / 6) * 5 + gap := usermem.Addr(stackSize.Cur) + if gap < minGap64 { + gap = minGap64 + } + if gap > maxGap { + gap = maxGap + } + defaultDir := MmapTopDown + if stackSize.Cur == limits.Infinity { + defaultDir = MmapBottomUp + } + + topDownMin := max - gap - maxMmapRand64 + maxRand := usermem.Addr(maxMmapRand64) + if topDownMin < preferredTopDownBaseMin { + // Try to keep TopDownBase above preferredTopDownBaseMin by + // shrinking maxRand. + maxAdjust := maxRand - minMmapRand64 + needAdjust := preferredTopDownBaseMin - topDownMin + if needAdjust <= maxAdjust { + maxRand -= needAdjust + } + } + + rnd := mmapRand(uint64(maxRand)) + l := MmapLayout{ + MinAddr: min, + MaxAddr: max, + // TASK_UNMAPPED_BASE in Linux. + BottomUpBase: (max/3 + rnd).RoundDown(), + TopDownBase: (max - gap - rnd).RoundDown(), + DefaultDirection: defaultDir, + // We may have reduced the maximum randomization to keep + // TopDownBase above preferredTopDownBaseMin while maintaining + // our stack gap. Stack allocations must use that max + // randomization to avoiding eating into the gap. + MaxStackRand: uint64(maxRand), + } + + // Final sanity check on the layout. + if !l.Valid() { + panic(fmt.Sprintf("Invalid MmapLayout: %+v", l)) + } + + return l, nil +} + +// PIELoadAddress implements Context.PIELoadAddress. +func (c *context64) PIELoadAddress(l MmapLayout) usermem.Addr { + base := preferredPIELoadAddr + max, ok := base.AddLength(maxMmapRand64) + if !ok { + panic(fmt.Sprintf("preferredPIELoadAddr %#x too large", base)) + } + + if max > l.MaxAddr { + // preferredPIELoadAddr won't fit; fall back to the standard + // Linux behavior of 2/3 of TopDownBase. TSAN won't like this. + // + // Don't bother trying to shrink the randomization for now. + base = l.TopDownBase / 3 * 2 + } + + return base + mmapRand(maxMmapRand64) +} + +// PtracePeekUser implements Context.PtracePeekUser. +func (c *context64) PtracePeekUser(addr uintptr) (interface{}, error) { + // TODO(gvisor.dev/issue/1239): Full ptrace supporting for Arm64. + return c.Native(0), nil +} + +// PtracePokeUser implements Context.PtracePokeUser. +func (c *context64) PtracePokeUser(addr, data uintptr) error { + // TODO(gvisor.dev/issue/1239): Full ptrace supporting for Arm64. + return nil +} diff --git a/pkg/sentry/arch/arch_state_aarch64.go b/pkg/sentry/arch/arch_state_aarch64.go new file mode 100644 index 000000000..0136a85ad --- /dev/null +++ b/pkg/sentry/arch/arch_state_aarch64.go @@ -0,0 +1,38 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package arch + +import ( + "syscall" +) + +type syscallPtraceRegs struct { + Regs [31]uint64 + Sp uint64 + Pc uint64 + Pstate uint64 +} + +// saveRegs is invoked by stateify. +func (s *State) saveRegs() syscallPtraceRegs { + return syscallPtraceRegs(s.Regs) +} + +// loadRegs is invoked by stateify. +func (s *State) loadRegs(r syscallPtraceRegs) { + s.Regs = syscall.PtraceRegs(r) +} diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index 9061fcc86..84f11b0d1 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +// +build amd64 i386 + package arch import ( diff --git a/pkg/sentry/arch/registers.proto b/pkg/sentry/arch/registers.proto index 9dc83e241..60c027aab 100644 --- a/pkg/sentry/arch/registers.proto +++ b/pkg/sentry/arch/registers.proto @@ -48,8 +48,45 @@ message AMD64Registers { uint64 gs_base = 27; } +message ARM64Registers { + uint64 r0 = 1; + uint64 r1 = 2; + uint64 r2 = 3; + uint64 r3 = 4; + uint64 r4 = 5; + uint64 r5 = 6; + uint64 r6 = 7; + uint64 r7 = 8; + uint64 r8 = 9; + uint64 r9 = 10; + uint64 r10 = 11; + uint64 r11 = 12; + uint64 r12 = 13; + uint64 r13 = 14; + uint64 r14 = 15; + uint64 r15 = 16; + uint64 r16 = 17; + uint64 r17 = 18; + uint64 r18 = 19; + uint64 r19 = 20; + uint64 r20 = 21; + uint64 r21 = 22; + uint64 r22 = 23; + uint64 r23 = 24; + uint64 r24 = 25; + uint64 r25 = 26; + uint64 r26 = 27; + uint64 r27 = 28; + uint64 r28 = 29; + uint64 r29 = 30; + uint64 r30 = 31; + uint64 sp = 32; + uint64 pc = 33; + uint64 pstate = 34; +} message Registers { oneof arch { AMD64Registers amd64 = 1; + ARM64Registers arm64 = 2; } } diff --git a/pkg/sentry/arch/signal.go b/pkg/sentry/arch/signal.go new file mode 100644 index 000000000..402e46025 --- /dev/null +++ b/pkg/sentry/arch/signal.go @@ -0,0 +1,250 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arch + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +// SignalAct represents the action that should be taken when a signal is +// delivered, and is equivalent to struct sigaction. +// +// +stateify savable +type SignalAct struct { + Handler uint64 + Flags uint64 + Restorer uint64 // Only used on amd64. + Mask linux.SignalSet +} + +// SerializeFrom implements NativeSignalAct.SerializeFrom. +func (s *SignalAct) SerializeFrom(other *SignalAct) { + *s = *other +} + +// DeserializeTo implements NativeSignalAct.DeserializeTo. +func (s *SignalAct) DeserializeTo(other *SignalAct) { + *other = *s +} + +// SignalStack represents information about a user stack, and is equivalent to +// stack_t. +// +// +stateify savable +type SignalStack struct { + Addr uint64 + Flags uint32 + _ uint32 + Size uint64 +} + +// SerializeFrom implements NativeSignalStack.SerializeFrom. +func (s *SignalStack) SerializeFrom(other *SignalStack) { + *s = *other +} + +// DeserializeTo implements NativeSignalStack.DeserializeTo. +func (s *SignalStack) DeserializeTo(other *SignalStack) { + *other = *s +} + +// SignalInfo represents information about a signal being delivered, and is +// equivalent to struct siginfo in linux kernel(linux/include/uapi/asm-generic/siginfo.h). +// +// +stateify savable +type SignalInfo struct { + Signo int32 // Signal number + Errno int32 // Errno value + Code int32 // Signal code + _ uint32 + + // struct siginfo::_sifields is a union. In SignalInfo, fields in the union + // are accessed through methods. + // + // For reference, here is the definition of _sifields: (_sigfault._trapno, + // which does not exist on x86, omitted for clarity) + // + // union { + // int _pad[SI_PAD_SIZE]; + // + // /* kill() */ + // struct { + // __kernel_pid_t _pid; /* sender's pid */ + // __ARCH_SI_UID_T _uid; /* sender's uid */ + // } _kill; + // + // /* POSIX.1b timers */ + // struct { + // __kernel_timer_t _tid; /* timer id */ + // int _overrun; /* overrun count */ + // char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)]; + // sigval_t _sigval; /* same as below */ + // int _sys_private; /* not to be passed to user */ + // } _timer; + // + // /* POSIX.1b signals */ + // struct { + // __kernel_pid_t _pid; /* sender's pid */ + // __ARCH_SI_UID_T _uid; /* sender's uid */ + // sigval_t _sigval; + // } _rt; + // + // /* SIGCHLD */ + // struct { + // __kernel_pid_t _pid; /* which child */ + // __ARCH_SI_UID_T _uid; /* sender's uid */ + // int _status; /* exit code */ + // __ARCH_SI_CLOCK_T _utime; + // __ARCH_SI_CLOCK_T _stime; + // } _sigchld; + // + // /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + // struct { + // void *_addr; /* faulting insn/memory ref. */ + // short _addr_lsb; /* LSB of the reported address */ + // } _sigfault; + // + // /* SIGPOLL */ + // struct { + // __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + // int _fd; + // } _sigpoll; + // + // /* SIGSYS */ + // struct { + // void *_call_addr; /* calling user insn */ + // int _syscall; /* triggering system call number */ + // unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + // } _sigsys; + // } _sifields; + // + // _sifields is padded so that the size of siginfo is SI_MAX_SIZE = 128 + // bytes. + Fields [128 - 16]byte +} + +// FixSignalCodeForUser fixes up si_code. +// +// The si_code we get from Linux may contain the kernel-specific code in the +// top 16 bits if it's positive (e.g., from ptrace). Linux's +// copy_siginfo_to_user does +// err |= __put_user((short)from->si_code, &to->si_code); +// to mask out those bits and we need to do the same. +func (s *SignalInfo) FixSignalCodeForUser() { + if s.Code > 0 { + s.Code &= 0x0000ffff + } +} + +// Pid returns the si_pid field. +func (s *SignalInfo) Pid() int32 { + return int32(usermem.ByteOrder.Uint32(s.Fields[0:4])) +} + +// SetPid mutates the si_pid field. +func (s *SignalInfo) SetPid(val int32) { + usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) +} + +// Uid returns the si_uid field. +func (s *SignalInfo) Uid() int32 { + return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) +} + +// SetUid mutates the si_uid field. +func (s *SignalInfo) SetUid(val int32) { + usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) +} + +// Sigval returns the sigval field, which is aliased to both si_int and si_ptr. +func (s *SignalInfo) Sigval() uint64 { + return usermem.ByteOrder.Uint64(s.Fields[8:16]) +} + +// SetSigval mutates the sigval field. +func (s *SignalInfo) SetSigval(val uint64) { + usermem.ByteOrder.PutUint64(s.Fields[8:16], val) +} + +// TimerID returns the si_timerid field. +func (s *SignalInfo) TimerID() linux.TimerID { + return linux.TimerID(usermem.ByteOrder.Uint32(s.Fields[0:4])) +} + +// SetTimerID sets the si_timerid field. +func (s *SignalInfo) SetTimerID(val linux.TimerID) { + usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) +} + +// Overrun returns the si_overrun field. +func (s *SignalInfo) Overrun() int32 { + return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) +} + +// SetOverrun sets the si_overrun field. +func (s *SignalInfo) SetOverrun(val int32) { + usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) +} + +// Addr returns the si_addr field. +func (s *SignalInfo) Addr() uint64 { + return usermem.ByteOrder.Uint64(s.Fields[0:8]) +} + +// SetAddr sets the si_addr field. +func (s *SignalInfo) SetAddr(val uint64) { + usermem.ByteOrder.PutUint64(s.Fields[0:8], val) +} + +// Status returns the si_status field. +func (s *SignalInfo) Status() int32 { + return int32(usermem.ByteOrder.Uint32(s.Fields[8:12])) +} + +// SetStatus mutates the si_status field. +func (s *SignalInfo) SetStatus(val int32) { + usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) +} + +// CallAddr returns the si_call_addr field. +func (s *SignalInfo) CallAddr() uint64 { + return usermem.ByteOrder.Uint64(s.Fields[0:8]) +} + +// SetCallAddr mutates the si_call_addr field. +func (s *SignalInfo) SetCallAddr(val uint64) { + usermem.ByteOrder.PutUint64(s.Fields[0:8], val) +} + +// Syscall returns the si_syscall field. +func (s *SignalInfo) Syscall() int32 { + return int32(usermem.ByteOrder.Uint32(s.Fields[8:12])) +} + +// SetSyscall mutates the si_syscall field. +func (s *SignalInfo) SetSyscall(val int32) { + usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) +} + +// Arch returns the si_arch field. +func (s *SignalInfo) Arch() uint32 { + return usermem.ByteOrder.Uint32(s.Fields[12:16]) +} + +// SetArch mutates the si_arch field. +func (s *SignalInfo) SetArch(val uint32) { + usermem.ByteOrder.PutUint32(s.Fields[12:16], val) +} diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index febd6f9b9..1e4f9c3c2 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -26,236 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usermem" ) -// SignalAct represents the action that should be taken when a signal is -// delivered, and is equivalent to struct sigaction on 64-bit x86. -// -// +stateify savable -type SignalAct struct { - Handler uint64 - Flags uint64 - Restorer uint64 - Mask linux.SignalSet -} - -// SerializeFrom implements NativeSignalAct.SerializeFrom. -func (s *SignalAct) SerializeFrom(other *SignalAct) { - *s = *other -} - -// DeserializeTo implements NativeSignalAct.DeserializeTo. -func (s *SignalAct) DeserializeTo(other *SignalAct) { - *other = *s -} - -// SignalStack represents information about a user stack, and is equivalent to -// stack_t on 64-bit x86. -// -// +stateify savable -type SignalStack struct { - Addr uint64 - Flags uint32 - _ uint32 - Size uint64 -} - -// SerializeFrom implements NativeSignalStack.SerializeFrom. -func (s *SignalStack) SerializeFrom(other *SignalStack) { - *s = *other -} - -// DeserializeTo implements NativeSignalStack.DeserializeTo. -func (s *SignalStack) DeserializeTo(other *SignalStack) { - *other = *s -} - -// SignalInfo represents information about a signal being delivered, and is -// equivalent to struct siginfo on 64-bit x86. -// -// +stateify savable -type SignalInfo struct { - Signo int32 // Signal number - Errno int32 // Errno value - Code int32 // Signal code - _ uint32 - - // struct siginfo::_sifields is a union. In SignalInfo, fields in the union - // are accessed through methods. - // - // For reference, here is the definition of _sifields: (_sigfault._trapno, - // which does not exist on x86, omitted for clarity) - // - // union { - // int _pad[SI_PAD_SIZE]; - // - // /* kill() */ - // struct { - // __kernel_pid_t _pid; /* sender's pid */ - // __ARCH_SI_UID_T _uid; /* sender's uid */ - // } _kill; - // - // /* POSIX.1b timers */ - // struct { - // __kernel_timer_t _tid; /* timer id */ - // int _overrun; /* overrun count */ - // char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)]; - // sigval_t _sigval; /* same as below */ - // int _sys_private; /* not to be passed to user */ - // } _timer; - // - // /* POSIX.1b signals */ - // struct { - // __kernel_pid_t _pid; /* sender's pid */ - // __ARCH_SI_UID_T _uid; /* sender's uid */ - // sigval_t _sigval; - // } _rt; - // - // /* SIGCHLD */ - // struct { - // __kernel_pid_t _pid; /* which child */ - // __ARCH_SI_UID_T _uid; /* sender's uid */ - // int _status; /* exit code */ - // __ARCH_SI_CLOCK_T _utime; - // __ARCH_SI_CLOCK_T _stime; - // } _sigchld; - // - // /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - // struct { - // void *_addr; /* faulting insn/memory ref. */ - // short _addr_lsb; /* LSB of the reported address */ - // } _sigfault; - // - // /* SIGPOLL */ - // struct { - // __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - // int _fd; - // } _sigpoll; - // - // /* SIGSYS */ - // struct { - // void *_call_addr; /* calling user insn */ - // int _syscall; /* triggering system call number */ - // unsigned int _arch; /* AUDIT_ARCH_* of syscall */ - // } _sigsys; - // } _sifields; - // - // _sifields is padded so that the size of siginfo is SI_MAX_SIZE = 128 - // bytes. - Fields [128 - 16]byte -} - -// FixSignalCodeForUser fixes up si_code. -// -// The si_code we get from Linux may contain the kernel-specific code in the -// top 16 bits if it's positive (e.g., from ptrace). Linux's -// copy_siginfo_to_user does -// err |= __put_user((short)from->si_code, &to->si_code); -// to mask out those bits and we need to do the same. -func (s *SignalInfo) FixSignalCodeForUser() { - if s.Code > 0 { - s.Code &= 0x0000ffff - } -} - -// Pid returns the si_pid field. -func (s *SignalInfo) Pid() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[0:4])) -} - -// SetPid mutates the si_pid field. -func (s *SignalInfo) SetPid(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) -} - -// Uid returns the si_uid field. -func (s *SignalInfo) Uid() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) -} - -// SetUid mutates the si_uid field. -func (s *SignalInfo) SetUid(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) -} - -// Sigval returns the sigval field, which is aliased to both si_int and si_ptr. -func (s *SignalInfo) Sigval() uint64 { - return usermem.ByteOrder.Uint64(s.Fields[8:16]) -} - -// SetSigval mutates the sigval field. -func (s *SignalInfo) SetSigval(val uint64) { - usermem.ByteOrder.PutUint64(s.Fields[8:16], val) -} - -// TimerID returns the si_timerid field. -func (s *SignalInfo) TimerID() linux.TimerID { - return linux.TimerID(usermem.ByteOrder.Uint32(s.Fields[0:4])) -} - -// SetTimerID sets the si_timerid field. -func (s *SignalInfo) SetTimerID(val linux.TimerID) { - usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) -} - -// Overrun returns the si_overrun field. -func (s *SignalInfo) Overrun() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) -} - -// SetOverrun sets the si_overrun field. -func (s *SignalInfo) SetOverrun(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) -} - -// Addr returns the si_addr field. -func (s *SignalInfo) Addr() uint64 { - return usermem.ByteOrder.Uint64(s.Fields[0:8]) -} - -// SetAddr sets the si_addr field. -func (s *SignalInfo) SetAddr(val uint64) { - usermem.ByteOrder.PutUint64(s.Fields[0:8], val) -} - -// Status returns the si_status field. -func (s *SignalInfo) Status() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[8:12])) -} - -// SetStatus mutates the si_status field. -func (s *SignalInfo) SetStatus(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) -} - -// CallAddr returns the si_call_addr field. -func (s *SignalInfo) CallAddr() uint64 { - return usermem.ByteOrder.Uint64(s.Fields[0:8]) -} - -// SetCallAddr mutates the si_call_addr field. -func (s *SignalInfo) SetCallAddr(val uint64) { - usermem.ByteOrder.PutUint64(s.Fields[0:8], val) -} - -// Syscall returns the si_syscall field. -func (s *SignalInfo) Syscall() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[8:12])) -} - -// SetSyscall mutates the si_syscall field. -func (s *SignalInfo) SetSyscall(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) -} - -// Arch returns the si_arch field. -func (s *SignalInfo) Arch() uint32 { - return usermem.ByteOrder.Uint32(s.Fields[12:16]) -} - -// SetArch mutates the si_arch field. -func (s *SignalInfo) SetArch(val uint32) { - usermem.ByteOrder.PutUint32(s.Fields[12:16], val) -} - // SignalContext64 is equivalent to struct sigcontext, the type passed as the // second argument to signal handlers set by signal(2). type SignalContext64 struct { diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go new file mode 100644 index 000000000..7d0e98935 --- /dev/null +++ b/pkg/sentry/arch/signal_arm64.go @@ -0,0 +1,126 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arch + +import ( + "encoding/binary" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +// SignalContext64 is equivalent to struct sigcontext, the type passed as the +// second argument to signal handlers set by signal(2). +type SignalContext64 struct { + FaultAddr uint64 + Regs [31]uint64 + Sp uint64 + Pc uint64 + Pstate uint64 + _pad [8]byte // __attribute__((__aligned__(16))) + Reserved [4096]uint8 +} + +// UContext64 is equivalent to ucontext on arm64(arch/arm64/include/uapi/asm/ucontext.h). +type UContext64 struct { + Flags uint64 + Link *UContext64 + Stack SignalStack + Sigset linux.SignalSet + // glibc uses a 1024-bit sigset_t + _pad [(1024 - 64) / 8]byte + // sigcontext must be aligned to 16-byte + _pad2 [8]byte + // last for future expansion + MContext SignalContext64 +} + +// NewSignalAct implements Context.NewSignalAct. +func (c *context64) NewSignalAct() NativeSignalAct { + return &SignalAct{} +} + +// NewSignalStack implements Context.NewSignalStack. +func (c *context64) NewSignalStack() NativeSignalStack { + return &SignalStack{} +} + +// SignalSetup implements Context.SignalSetup. +func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt *SignalStack, sigset linux.SignalSet) error { + sp := st.Bottom + + if !(alt.IsEnabled() && sp == alt.Top()) { + sp -= 128 + } + + // Construct the UContext64 now since we need its size. + uc := &UContext64{ + Flags: 0, + Stack: *alt, + MContext: SignalContext64{ + Regs: c.Regs.Regs, + Sp: c.Regs.Sp, + Pc: c.Regs.Pc, + Pstate: c.Regs.Pstate, + }, + Sigset: sigset, + } + + ucSize := binary.Size(uc) + if ucSize < 0 { + panic("can't get size of UContext64") + } + // st.Arch.Width() is for the restorer address. sizeof(siginfo) == 128. + frameSize := int(st.Arch.Width()) + ucSize + 128 + frameBottom := (sp-usermem.Addr(frameSize)) & ^usermem.Addr(15) - 8 + sp = frameBottom + usermem.Addr(frameSize) + st.Bottom = sp + + // Prior to proceeding, figure out if the frame will exhaust the range + // for the signal stack. This is not allowed, and should immediately + // force signal delivery (reverting to the default handler). + if act.IsOnStack() && alt.IsEnabled() && !alt.Contains(frameBottom) { + return syscall.EFAULT + } + + // Adjust the code. + info.FixSignalCodeForUser() + + // Set up the stack frame. + infoAddr, err := st.Push(info) + if err != nil { + return err + } + ucAddr, err := st.Push(uc) + if err != nil { + return err + } + + // Set up registers. + c.Regs.Sp = uint64(st.Bottom) + c.Regs.Pc = act.Handler + c.Regs.Regs[0] = uint64(info.Signo) + c.Regs.Regs[1] = uint64(infoAddr) + c.Regs.Regs[2] = uint64(ucAddr) + + return nil +} + +// SignalRestore implements Context.SignalRestore. +// Only used on intel. +func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalStack, error) { + return 0, SignalStack{}, nil +} diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go index 5a3228113..d324da705 100644 --- a/pkg/sentry/arch/signal_stack.go +++ b/pkg/sentry/arch/signal_stack.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build i386 amd64 +// +build i386 amd64 arm64 package arch diff --git a/pkg/sentry/arch/syscalls_arm64.go b/pkg/sentry/arch/syscalls_arm64.go new file mode 100644 index 000000000..00d5ef461 --- /dev/null +++ b/pkg/sentry/arch/syscalls_arm64.go @@ -0,0 +1,62 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package arch + +const restartSyscallNr = uintptr(128) + +// SyscallNo returns the syscall number according to the 64-bit convention. +func (c *context64) SyscallNo() uintptr { + return uintptr(c.Regs.Regs[8]) +} + +// SyscallArgs provides syscall arguments according to the 64-bit convention. +// +// Due to the way addresses are mapped for the sentry this binary *must* be +// built in 64-bit mode. So we can just assume the syscall numbers that come +// back match the expected host system call numbers. +// General purpose registers usage on Arm64: +// R0...R7: parameter/result registers. +// R8: indirect result location register. +// R9...R15: temporary registers. +// R16: the first intra-procedure-call scratch register. +// R17: the second intra-procedure-call scratch register. +// R18: the platform register. +// R19...R28: callee-saved registers. +// R29: the frame pointer. +// R30: the link register. +func (c *context64) SyscallArgs() SyscallArguments { + return SyscallArguments{ + SyscallArgument{Value: uintptr(c.Regs.Regs[0])}, + SyscallArgument{Value: uintptr(c.Regs.Regs[1])}, + SyscallArgument{Value: uintptr(c.Regs.Regs[2])}, + SyscallArgument{Value: uintptr(c.Regs.Regs[3])}, + SyscallArgument{Value: uintptr(c.Regs.Regs[4])}, + SyscallArgument{Value: uintptr(c.Regs.Regs[5])}, + } +} + +// RestartSyscall implements Context.RestartSyscall. +func (c *context64) RestartSyscall() { + c.Regs.Pc -= SyscallWidth + c.Regs.Regs[8] = uint64(restartSyscallNr) +} + +// RestartSyscallWithRestartBlock implements Context.RestartSyscallWithRestartBlock. +func (c *context64) RestartSyscallWithRestartBlock() { + c.Regs.Pc -= SyscallWidth + c.Regs.Regs[8] = uint64(restartSyscallNr) +} |