summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r--pkg/sentry/platform/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/BUILD2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/filters.go33
-rw-r--r--pkg/sentry/platform/kvm/kvm.go14
-rw-r--r--pkg/sentry/platform/kvm/machine.go9
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/machine_unsafe.go3
-rw-r--r--pkg/sentry/platform/platform.go30
-rw-r--r--pkg/sentry/platform/ptrace/BUILD1
-rw-r--r--pkg/sentry/platform/ptrace/filters.go33
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go15
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go16
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux.go5
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.s2
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go8
16 files changed, 161 insertions, 15 deletions
diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD
index 0b9962b2b..9aa6ec507 100644
--- a/pkg/sentry/platform/BUILD
+++ b/pkg/sentry/platform/BUILD
@@ -28,6 +28,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/atomicbitops",
"//pkg/log",
+ "//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/context",
"//pkg/sentry/platform/safecopy",
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 9ccf77fdf..ad8b95744 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -14,6 +14,7 @@ go_library(
"bluepill_fault.go",
"bluepill_unsafe.go",
"context.go",
+ "filters.go",
"kvm.go",
"kvm_amd64.go",
"kvm_amd64_unsafe.go",
@@ -33,6 +34,7 @@ go_library(
"//pkg/cpuid",
"//pkg/log",
"//pkg/procid",
+ "//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
index 0effd33ac..9d8af143e 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
@@ -30,7 +30,7 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 {
return &((*arch.UContext64)(context).MContext)
}
-// dieArchSetup initialies the state for dieTrampoline.
+// dieArchSetup initializes the state for dieTrampoline.
//
// The amd64 dieTrampoline requires the vCPU to be set in BX, and the last RIP
// to be in AX. The trampoline then simulates a call to dieHandler from the
diff --git a/pkg/sentry/platform/kvm/filters.go b/pkg/sentry/platform/kvm/filters.go
new file mode 100644
index 000000000..7d949f1dd
--- /dev/null
+++ b/pkg/sentry/platform/kvm/filters.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kvm
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+// SyscallFilters returns syscalls made exclusively by the KVM platform.
+func (*KVM) SyscallFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_ARCH_PRCTL: {},
+ syscall.SYS_IOCTL: {},
+ syscall.SYS_MMAP: {},
+ syscall.SYS_RT_SIGSUSPEND: {},
+ syscall.SYS_RT_SIGTIMEDWAIT: {},
+ 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
+ }
+}
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index b49d7f3c4..ee4cd2f4d 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -141,3 +141,17 @@ func (k *KVM) NewContext() platform.Context {
machine: k.machine,
}
}
+
+type constructor struct{}
+
+func (*constructor) New(f *os.File) (platform.Platform, error) {
+ return New(f)
+}
+
+func (*constructor) OpenDevice() (*os.File, error) {
+ return OpenDevice()
+}
+
+func init() {
+ platform.Register("kvm", &constructor{})
+}
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 69b2f92a7..679087e25 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -135,7 +135,7 @@ type dieState struct {
// newVCPU creates a returns a new vCPU.
//
-// Precondtion: mu must be held.
+// Precondition: mu must be held.
func (m *machine) newVCPU() *vCPU {
id := len(m.vCPUs)
@@ -426,7 +426,12 @@ func (c *vCPU) unlock() {
// Normal state.
case vCPUUser | vCPUGuest | vCPUWaiter:
// Force a transition: this must trigger a notification when we
- // return from guest mode.
+ // return from guest mode. We must clear vCPUWaiter here
+ // anyways, because BounceToKernel will force a transition only
+ // from ring3 to ring0, which will not clear this bit. Halt may
+ // workaround the issue, but if there is no exception or
+ // syscall in this period, BounceToKernel will hang.
+ atomicbitops.AndUint32(&c.state, ^vCPUWaiter)
c.notify()
case vCPUUser | vCPUWaiter:
// Waiting for the lock to be released; the responsibility is
diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
index c87fa7b7c..506ec9af1 100644
--- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
@@ -87,7 +87,7 @@ func (c *vCPU) setCPUID() error {
// setSystemTime sets the TSC for the vCPU.
//
-// This has to make the call many times in order to minimize the intrinstic
+// This has to make the call many times in order to minimize the intrinsic
// error in the offset. Unfortunately KVM does not expose a relative offset via
// the API, so this is an approximation. We do this via an iterative algorithm.
// This has the advantage that it can generally deal with highly variable
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index 8d76e106e..405e00292 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -21,6 +21,7 @@ package kvm
import (
"fmt"
+ "math"
"sync/atomic"
"syscall"
"unsafe"
@@ -134,7 +135,7 @@ func (c *vCPU) notify() {
syscall.SYS_FUTEX,
uintptr(unsafe.Pointer(&c.state)),
linux.FUTEX_WAKE|linux.FUTEX_PRIVATE_FLAG,
- ^uintptr(0), // Number of waiters.
+ math.MaxInt32, // Number of waiters.
0, 0, 0)
if errno != 0 {
throw("futex wake error")
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index 5ad98a329..ec22dbf87 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -19,8 +19,10 @@ package platform
import (
"fmt"
+ "os"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/safemem"
"gvisor.dev/gvisor/pkg/sentry/usermem"
@@ -93,6 +95,9 @@ type Platform interface {
// Platforms for which this does not hold may panic if PreemptAllCPUs is
// called.
PreemptAllCPUs() error
+
+ // SyscallFilters returns syscalls made exclusively by this platform.
+ SyscallFilters() seccomp.SyscallRules
}
// NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and
@@ -256,7 +261,7 @@ type AddressSpaceIO interface {
LoadUint32(addr usermem.Addr) (uint32, error)
}
-// NoAddressSpaceIO implements AddressSpaceIO methods by panicing.
+// NoAddressSpaceIO implements AddressSpaceIO methods by panicking.
type NoAddressSpaceIO struct{}
// CopyOut implements AddressSpaceIO.CopyOut.
@@ -347,3 +352,26 @@ type File interface {
func (fr FileRange) String() string {
return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End)
}
+
+// Constructor represents a platform type.
+type Constructor interface {
+ New(deviceFile *os.File) (Platform, error)
+ OpenDevice() (*os.File, error)
+}
+
+// platforms contains all available platform types.
+var platforms = map[string]Constructor{}
+
+// Register registers a new platform type.
+func Register(name string, platform Constructor) {
+ platforms[name] = platform
+}
+
+// Lookup looks up the platform constructor by name.
+func Lookup(name string) (Constructor, error) {
+ p, ok := platforms[name]
+ if !ok {
+ return nil, fmt.Errorf("unknown platform: %v", name)
+ }
+ return p, nil
+}
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index 6a1343f47..1b6c54e96 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "ptrace",
srcs = [
+ "filters.go",
"ptrace.go",
"ptrace_unsafe.go",
"stub_amd64.s",
diff --git a/pkg/sentry/platform/ptrace/filters.go b/pkg/sentry/platform/ptrace/filters.go
new file mode 100644
index 000000000..1e07cfd0d
--- /dev/null
+++ b/pkg/sentry/platform/ptrace/filters.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ptrace
+
+import (
+ "syscall"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+// SyscallFilters returns syscalls made exclusively by the ptrace platform.
+func (*PTrace) SyscallFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ unix.SYS_GETCPU: {},
+ unix.SYS_SCHED_SETAFFINITY: {},
+ syscall.SYS_PTRACE: {},
+ syscall.SYS_TGKILL: {},
+ syscall.SYS_WAIT4: {},
+ }
+}
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index ee7e0640c..6fd30ed25 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -45,6 +45,7 @@
package ptrace
import (
+ "os"
"sync"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -236,3 +237,17 @@ func (p *PTrace) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan s
func (*PTrace) NewContext() platform.Context {
return &context{}
}
+
+type constructor struct{}
+
+func (*constructor) New(*os.File) (platform.Platform, error) {
+ return New()
+}
+
+func (*constructor) OpenDevice() (*os.File, error) {
+ return nil, nil
+}
+
+func init() {
+ platform.Register("ptrace", &constructor{})
+}
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index f15b3415a..15e84735e 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -123,7 +123,7 @@ type subprocess struct {
contexts map[*context]struct{}
}
-// newSubprocess returns a useable subprocess.
+// newSubprocess returns a usable subprocess.
//
// This will either be a newly created subprocess, or one from the global pool.
// The create function will be called in the latter case, which is guaranteed
@@ -155,6 +155,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
errChan <- err
return
}
+ firstThread.grabInitRegs()
// Ready to handle requests.
errChan <- nil
@@ -179,6 +180,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
// Detach the thread.
t.detach()
+ t.initRegs = firstThread.initRegs
// Return the thread.
r <- t
@@ -253,7 +255,7 @@ func (s *subprocess) newThread() *thread {
return t
}
-// attach attachs to the thread.
+// attach attaches to the thread.
func (t *thread) attach() {
if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_ATTACH, uintptr(t.tid), 0); errno != 0 {
panic(fmt.Sprintf("unable to attach: %v", errno))
@@ -269,7 +271,9 @@ func (t *thread) attach() {
// Initialize options.
t.init()
+}
+func (t *thread) grabInitRegs() {
// Grab registers.
//
// Note that we adjust the current register RIP value to be just before
@@ -281,9 +285,9 @@ func (t *thread) attach() {
t.initRegs.Rip -= initRegsRipAdjustment
}
-// detach detachs from the thread.
+// detach detaches from the thread.
//
-// Because the SIGSTOP is not supressed, the thread will enter group-stop.
+// Because the SIGSTOP is not suppressed, the thread will enter group-stop.
func (t *thread) detach() {
if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(syscall.SIGSTOP), 0, 0); errno != 0 {
panic(fmt.Sprintf("can't detach new clone: %v", errno))
@@ -370,7 +374,7 @@ func (t *thread) destroy() {
// init initializes trace options.
func (t *thread) init() {
- // Set our TRACESYSGOOD option to differeniate real SIGTRAP. We also
+ // Set the TRACESYSGOOD option to differentiate real SIGTRAP.
// set PTRACE_O_EXITKILL to ensure that the unexpected exit of the
// sentry will immediately kill the associated stubs.
const PTRACE_O_EXITKILL = 0x100000
@@ -554,7 +558,7 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
if c.signalInfo.Code > 0 {
// The signal was generated by the kernel. We inspect
// the signal information, and may patch it in order to
- // faciliate vsyscall emulation. See patchSignalInfo.
+ // facilitate vsyscall emulation. See patchSignalInfo.
patchSignalInfo(regs, &c.signalInfo)
return false
} else if c.signalInfo.Code <= 0 && c.signalInfo.Pid() == int32(os.Getpid()) {
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go
index 84d4cf0bd..87ded0bbd 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux.go
@@ -235,6 +235,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
return nil, fmt.Errorf("wait failed: expected SIGSTOP, got %v", sig)
}
t.attach()
+ t.grabInitRegs()
return t, nil
}
@@ -305,7 +306,7 @@ func (s *subprocess) createStub() (*thread, error) {
arch.SyscallArgument{Value: 0},
arch.SyscallArgument{Value: 0})
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("creating stub process: %v", err)
}
// Wait for child to enter group-stop, so we don't stop its
@@ -324,7 +325,7 @@ func (s *subprocess) createStub() (*thread, error) {
arch.SyscallArgument{Value: 0},
arch.SyscallArgument{Value: 0})
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("waiting on stub process: %v", err)
}
childT := &thread{
diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s
index 8cb8c4996..02df38331 100644
--- a/pkg/sentry/platform/ring0/entry_amd64.s
+++ b/pkg/sentry/platform/ring0/entry_amd64.s
@@ -15,7 +15,7 @@
#include "funcdata.h"
#include "textflag.h"
-// NB: Offsets are programatically generated (see BUILD).
+// NB: Offsets are programmatically generated (see BUILD).
//
// This file is concatenated with the definitions.
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
index 3577b5127..0feff8778 100644
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -70,6 +70,14 @@ func (c *CPU) init() {
c.tss.ist1Lo = uint32(stackAddr)
c.tss.ist1Hi = uint32(stackAddr >> 32)
+ // Set the I/O bitmap base address beyond the last byte in the TSS
+ // to block access to the entire I/O address range.
+ //
+ // From section 18.5.2 "I/O Permission Bit Map" from Intel SDM vol1:
+ // I/O addresses not spanned by the map are treated as if they had set
+ // bits in the map.
+ c.tss.ioPerm = tssLimit + 1
+
// Permanently set the kernel segments.
c.registers.Cs = uint64(Kcode)
c.registers.Ds = uint64(Kdata)