diff options
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r-- | pkg/sentry/platform/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/filters.go | 33 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm.go | 14 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 9 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64_unsafe.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_unsafe.go | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/platform.go | 30 | ||||
-rw-r--r-- | pkg/sentry/platform/ptrace/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/ptrace/filters.go | 33 | ||||
-rw-r--r-- | pkg/sentry/platform/ptrace/ptrace.go | 15 | ||||
-rw-r--r-- | pkg/sentry/platform/ptrace/subprocess.go | 16 | ||||
-rw-r--r-- | pkg/sentry/platform/ptrace/subprocess_linux.go | 5 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/entry_amd64.s | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_amd64.go | 8 |
16 files changed, 161 insertions, 15 deletions
diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index 0b9962b2b..9aa6ec507 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -28,6 +28,7 @@ go_library( "//pkg/abi/linux", "//pkg/atomicbitops", "//pkg/log", + "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/context", "//pkg/sentry/platform/safecopy", diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 9ccf77fdf..ad8b95744 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -14,6 +14,7 @@ go_library( "bluepill_fault.go", "bluepill_unsafe.go", "context.go", + "filters.go", "kvm.go", "kvm_amd64.go", "kvm_amd64_unsafe.go", @@ -33,6 +34,7 @@ go_library( "//pkg/cpuid", "//pkg/log", "//pkg/procid", + "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index 0effd33ac..9d8af143e 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -30,7 +30,7 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 { return &((*arch.UContext64)(context).MContext) } -// dieArchSetup initialies the state for dieTrampoline. +// dieArchSetup initializes the state for dieTrampoline. // // The amd64 dieTrampoline requires the vCPU to be set in BX, and the last RIP // to be in AX. The trampoline then simulates a call to dieHandler from the diff --git a/pkg/sentry/platform/kvm/filters.go b/pkg/sentry/platform/kvm/filters.go new file mode 100644 index 000000000..7d949f1dd --- /dev/null +++ b/pkg/sentry/platform/kvm/filters.go @@ -0,0 +1,33 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kvm + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/seccomp" +) + +// SyscallFilters returns syscalls made exclusively by the KVM platform. +func (*KVM) SyscallFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_ARCH_PRCTL: {}, + syscall.SYS_IOCTL: {}, + syscall.SYS_MMAP: {}, + syscall.SYS_RT_SIGSUSPEND: {}, + syscall.SYS_RT_SIGTIMEDWAIT: {}, + 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. + } +} diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index b49d7f3c4..ee4cd2f4d 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -141,3 +141,17 @@ func (k *KVM) NewContext() platform.Context { machine: k.machine, } } + +type constructor struct{} + +func (*constructor) New(f *os.File) (platform.Platform, error) { + return New(f) +} + +func (*constructor) OpenDevice() (*os.File, error) { + return OpenDevice() +} + +func init() { + platform.Register("kvm", &constructor{}) +} diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 69b2f92a7..679087e25 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -135,7 +135,7 @@ type dieState struct { // newVCPU creates a returns a new vCPU. // -// Precondtion: mu must be held. +// Precondition: mu must be held. func (m *machine) newVCPU() *vCPU { id := len(m.vCPUs) @@ -426,7 +426,12 @@ func (c *vCPU) unlock() { // Normal state. case vCPUUser | vCPUGuest | vCPUWaiter: // Force a transition: this must trigger a notification when we - // return from guest mode. + // return from guest mode. We must clear vCPUWaiter here + // anyways, because BounceToKernel will force a transition only + // from ring3 to ring0, which will not clear this bit. Halt may + // workaround the issue, but if there is no exception or + // syscall in this period, BounceToKernel will hang. + atomicbitops.AndUint32(&c.state, ^vCPUWaiter) c.notify() case vCPUUser | vCPUWaiter: // Waiting for the lock to be released; the responsibility is diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index c87fa7b7c..506ec9af1 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -87,7 +87,7 @@ func (c *vCPU) setCPUID() error { // setSystemTime sets the TSC for the vCPU. // -// This has to make the call many times in order to minimize the intrinstic +// This has to make the call many times in order to minimize the intrinsic // error in the offset. Unfortunately KVM does not expose a relative offset via // the API, so this is an approximation. We do this via an iterative algorithm. // This has the advantage that it can generally deal with highly variable diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 8d76e106e..405e00292 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -21,6 +21,7 @@ package kvm import ( "fmt" + "math" "sync/atomic" "syscall" "unsafe" @@ -134,7 +135,7 @@ func (c *vCPU) notify() { syscall.SYS_FUTEX, uintptr(unsafe.Pointer(&c.state)), linux.FUTEX_WAKE|linux.FUTEX_PRIVATE_FLAG, - ^uintptr(0), // Number of waiters. + math.MaxInt32, // Number of waiters. 0, 0, 0) if errno != 0 { throw("futex wake error") diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index 5ad98a329..ec22dbf87 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -19,8 +19,10 @@ package platform import ( "fmt" + "os" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" @@ -93,6 +95,9 @@ type Platform interface { // Platforms for which this does not hold may panic if PreemptAllCPUs is // called. PreemptAllCPUs() error + + // SyscallFilters returns syscalls made exclusively by this platform. + SyscallFilters() seccomp.SyscallRules } // NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and @@ -256,7 +261,7 @@ type AddressSpaceIO interface { LoadUint32(addr usermem.Addr) (uint32, error) } -// NoAddressSpaceIO implements AddressSpaceIO methods by panicing. +// NoAddressSpaceIO implements AddressSpaceIO methods by panicking. type NoAddressSpaceIO struct{} // CopyOut implements AddressSpaceIO.CopyOut. @@ -347,3 +352,26 @@ type File interface { func (fr FileRange) String() string { return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End) } + +// Constructor represents a platform type. +type Constructor interface { + New(deviceFile *os.File) (Platform, error) + OpenDevice() (*os.File, error) +} + +// platforms contains all available platform types. +var platforms = map[string]Constructor{} + +// Register registers a new platform type. +func Register(name string, platform Constructor) { + platforms[name] = platform +} + +// Lookup looks up the platform constructor by name. +func Lookup(name string) (Constructor, error) { + p, ok := platforms[name] + if !ok { + return nil, fmt.Errorf("unknown platform: %v", name) + } + return p, nil +} diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index 6a1343f47..1b6c54e96 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -5,6 +5,7 @@ package(licenses = ["notice"]) go_library( name = "ptrace", srcs = [ + "filters.go", "ptrace.go", "ptrace_unsafe.go", "stub_amd64.s", diff --git a/pkg/sentry/platform/ptrace/filters.go b/pkg/sentry/platform/ptrace/filters.go new file mode 100644 index 000000000..1e07cfd0d --- /dev/null +++ b/pkg/sentry/platform/ptrace/filters.go @@ -0,0 +1,33 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ptrace + +import ( + "syscall" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/seccomp" +) + +// SyscallFilters returns syscalls made exclusively by the ptrace platform. +func (*PTrace) SyscallFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + unix.SYS_GETCPU: {}, + unix.SYS_SCHED_SETAFFINITY: {}, + syscall.SYS_PTRACE: {}, + syscall.SYS_TGKILL: {}, + syscall.SYS_WAIT4: {}, + } +} diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index ee7e0640c..6fd30ed25 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -45,6 +45,7 @@ package ptrace import ( + "os" "sync" "gvisor.dev/gvisor/pkg/abi/linux" @@ -236,3 +237,17 @@ func (p *PTrace) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan s func (*PTrace) NewContext() platform.Context { return &context{} } + +type constructor struct{} + +func (*constructor) New(*os.File) (platform.Platform, error) { + return New() +} + +func (*constructor) OpenDevice() (*os.File, error) { + return nil, nil +} + +func init() { + platform.Register("ptrace", &constructor{}) +} diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index f15b3415a..15e84735e 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -123,7 +123,7 @@ type subprocess struct { contexts map[*context]struct{} } -// newSubprocess returns a useable subprocess. +// newSubprocess returns a usable subprocess. // // This will either be a newly created subprocess, or one from the global pool. // The create function will be called in the latter case, which is guaranteed @@ -155,6 +155,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) { errChan <- err return } + firstThread.grabInitRegs() // Ready to handle requests. errChan <- nil @@ -179,6 +180,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) { // Detach the thread. t.detach() + t.initRegs = firstThread.initRegs // Return the thread. r <- t @@ -253,7 +255,7 @@ func (s *subprocess) newThread() *thread { return t } -// attach attachs to the thread. +// attach attaches to the thread. func (t *thread) attach() { if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_ATTACH, uintptr(t.tid), 0); errno != 0 { panic(fmt.Sprintf("unable to attach: %v", errno)) @@ -269,7 +271,9 @@ func (t *thread) attach() { // Initialize options. t.init() +} +func (t *thread) grabInitRegs() { // Grab registers. // // Note that we adjust the current register RIP value to be just before @@ -281,9 +285,9 @@ func (t *thread) attach() { t.initRegs.Rip -= initRegsRipAdjustment } -// detach detachs from the thread. +// detach detaches from the thread. // -// Because the SIGSTOP is not supressed, the thread will enter group-stop. +// Because the SIGSTOP is not suppressed, the thread will enter group-stop. func (t *thread) detach() { if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(syscall.SIGSTOP), 0, 0); errno != 0 { panic(fmt.Sprintf("can't detach new clone: %v", errno)) @@ -370,7 +374,7 @@ func (t *thread) destroy() { // init initializes trace options. func (t *thread) init() { - // Set our TRACESYSGOOD option to differeniate real SIGTRAP. We also + // Set the TRACESYSGOOD option to differentiate real SIGTRAP. // set PTRACE_O_EXITKILL to ensure that the unexpected exit of the // sentry will immediately kill the associated stubs. const PTRACE_O_EXITKILL = 0x100000 @@ -554,7 +558,7 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool { if c.signalInfo.Code > 0 { // The signal was generated by the kernel. We inspect // the signal information, and may patch it in order to - // faciliate vsyscall emulation. See patchSignalInfo. + // facilitate vsyscall emulation. See patchSignalInfo. patchSignalInfo(regs, &c.signalInfo) return false } else if c.signalInfo.Code <= 0 && c.signalInfo.Pid() == int32(os.Getpid()) { diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 84d4cf0bd..87ded0bbd 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -235,6 +235,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro return nil, fmt.Errorf("wait failed: expected SIGSTOP, got %v", sig) } t.attach() + t.grabInitRegs() return t, nil } @@ -305,7 +306,7 @@ func (s *subprocess) createStub() (*thread, error) { arch.SyscallArgument{Value: 0}, arch.SyscallArgument{Value: 0}) if err != nil { - return nil, err + return nil, fmt.Errorf("creating stub process: %v", err) } // Wait for child to enter group-stop, so we don't stop its @@ -324,7 +325,7 @@ func (s *subprocess) createStub() (*thread, error) { arch.SyscallArgument{Value: 0}, arch.SyscallArgument{Value: 0}) if err != nil { - return nil, err + return nil, fmt.Errorf("waiting on stub process: %v", err) } childT := &thread{ diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s index 8cb8c4996..02df38331 100644 --- a/pkg/sentry/platform/ring0/entry_amd64.s +++ b/pkg/sentry/platform/ring0/entry_amd64.s @@ -15,7 +15,7 @@ #include "funcdata.h" #include "textflag.h" -// NB: Offsets are programatically generated (see BUILD). +// NB: Offsets are programmatically generated (see BUILD). // // This file is concatenated with the definitions. diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index 3577b5127..0feff8778 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -70,6 +70,14 @@ func (c *CPU) init() { c.tss.ist1Lo = uint32(stackAddr) c.tss.ist1Hi = uint32(stackAddr >> 32) + // Set the I/O bitmap base address beyond the last byte in the TSS + // to block access to the entire I/O address range. + // + // From section 18.5.2 "I/O Permission Bit Map" from Intel SDM vol1: + // I/O addresses not spanned by the map are treated as if they had set + // bits in the map. + c.tss.ioPerm = tssLimit + 1 + // Permanently set the kernel segments. c.registers.Cs = uint64(Kcode) c.registers.Ds = uint64(Kdata) |