summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/ptrace
diff options
context:
space:
mode:
authorkevin.xu <cming.xu@gmail.com>2020-04-27 21:51:31 +0800
committerGitHub <noreply@github.com>2020-04-27 21:51:31 +0800
commite896ca54db67524afc20b644d43c72185e72dc0e (patch)
tree2a16f3a62a5cafd098f1f028c621f1b655589d69 /pkg/sentry/platform/ptrace
parent1f19624fa127d7d59cabe29593cc80b7fe6c81f8 (diff)
parent3c67754663f424f2ebbc0ff2a4c80e30618d5355 (diff)
Merge pull request #1 from google/master
catch up
Diffstat (limited to 'pkg/sentry/platform/ptrace')
-rw-r--r--pkg/sentry/platform/ptrace/BUILD10
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go17
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_amd64.go19
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_arm64.go5
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go62
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go6
-rw-r--r--pkg/sentry/platform/ptrace/stub_amd64.s29
-rw-r--r--pkg/sentry/platform/ptrace/stub_arm64.s30
-rw-r--r--pkg/sentry/platform/ptrace/stub_unsafe.go4
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go54
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_amd64.go130
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_arm64.go62
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux.go93
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go23
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_unsafe.go2
15 files changed, 357 insertions, 189 deletions
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index ebcc8c098..30402c2df 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -1,4 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -9,6 +9,7 @@ go_library(
"ptrace.go",
"ptrace_amd64.go",
"ptrace_arm64.go",
+ "ptrace_arm64_unsafe.go",
"ptrace_unsafe.go",
"stub_amd64.s",
"stub_arm64.s",
@@ -20,18 +21,19 @@ go_library(
"subprocess_linux_unsafe.go",
"subprocess_unsafe.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ptrace",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/log",
"//pkg/procid",
+ "//pkg/safecopy",
"//pkg/seccomp",
"//pkg/sentry/arch",
+ "//pkg/sentry/hostcpu",
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
- "//pkg/sentry/platform/safecopy",
- "//pkg/sentry/usermem",
+ "//pkg/sync",
+ "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index 7b120a15d..08d055e05 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -46,13 +46,13 @@ package ptrace
import (
"os"
- "sync"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/usermem"
)
var (
@@ -177,6 +177,9 @@ func (c *context) Interrupt() {
c.interrupt.NotifyInterrupt()
}
+// Release implements platform.Context.Release().
+func (c *context) Release() {}
+
// PTrace represents a collection of ptrace subprocesses.
type PTrace struct {
platform.MMapMinAddr
@@ -248,6 +251,16 @@ func (*constructor) OpenDevice() (*os.File, error) {
return nil, nil
}
+// Flags implements platform.Constructor.Flags().
+func (*constructor) Requirements() platform.Requirements {
+ // TODO(b/75837838): Also set a new PID namespace so that we limit
+ // access to other host processes.
+ return platform.Requirements{
+ RequiresCapSysPtrace: true,
+ RequiresCurrentPIDNS: true,
+ }
+}
+
func init() {
platform.Register("ptrace", &constructor{})
}
diff --git a/pkg/sentry/platform/ptrace/ptrace_amd64.go b/pkg/sentry/platform/ptrace/ptrace_amd64.go
index db0212538..3b9a870a5 100644
--- a/pkg/sentry/platform/ptrace/ptrace_amd64.go
+++ b/pkg/sentry/platform/ptrace/ptrace_amd64.go
@@ -15,9 +15,8 @@
package ptrace
import (
- "syscall"
-
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// fpRegSet returns the GETREGSET/SETREGSET register set type to be used.
@@ -28,6 +27,20 @@ func fpRegSet(useXsave bool) uintptr {
return linux.NT_PRFPREG
}
-func stackPointer(r *syscall.PtraceRegs) uintptr {
+func stackPointer(r *arch.Registers) uintptr {
return uintptr(r.Rsp)
}
+
+// x86 use the fs_base register to store the TLS pointer which can be
+// get/set in "func (t *thread) get/setRegs(regs *arch.Registers)".
+// So both of the get/setTLS() operations are noop here.
+
+// getTLS gets the thread local storage register.
+func (t *thread) getTLS(tls *uint64) error {
+ return nil
+}
+
+// setTLS sets the thread local storage register.
+func (t *thread) setTLS(tls *uint64) error {
+ return nil
+}
diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64.go b/pkg/sentry/platform/ptrace/ptrace_arm64.go
index 4db28c534..5c869926a 100644
--- a/pkg/sentry/platform/ptrace/ptrace_arm64.go
+++ b/pkg/sentry/platform/ptrace/ptrace_arm64.go
@@ -15,9 +15,8 @@
package ptrace
import (
- "syscall"
-
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// fpRegSet returns the GETREGSET/SETREGSET register set type to be used.
@@ -25,6 +24,6 @@ func fpRegSet(_ bool) uintptr {
return linux.NT_PRFPREG
}
-func stackPointer(r *syscall.PtraceRegs) uintptr {
+func stackPointer(r *arch.Registers) uintptr {
return uintptr(r.Sp)
}
diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go
new file mode 100644
index 000000000..32b8a6be9
--- /dev/null
+++ b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go
@@ -0,0 +1,62 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ptrace
+
+import (
+ "syscall"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+)
+
+// getTLS gets the thread local storage register.
+func (t *thread) getTLS(tls *uint64) error {
+ iovec := syscall.Iovec{
+ Base: (*byte)(unsafe.Pointer(tls)),
+ Len: uint64(unsafe.Sizeof(*tls)),
+ }
+ _, _, errno := syscall.RawSyscall6(
+ syscall.SYS_PTRACE,
+ syscall.PTRACE_GETREGSET,
+ uintptr(t.tid),
+ linux.NT_ARM_TLS,
+ uintptr(unsafe.Pointer(&iovec)),
+ 0, 0)
+ if errno != 0 {
+ return errno
+ }
+ return nil
+}
+
+// setTLS sets the thread local storage register.
+func (t *thread) setTLS(tls *uint64) error {
+ iovec := syscall.Iovec{
+ Base: (*byte)(unsafe.Pointer(tls)),
+ Len: uint64(unsafe.Sizeof(*tls)),
+ }
+ _, _, errno := syscall.RawSyscall6(
+ syscall.SYS_PTRACE,
+ syscall.PTRACE_SETREGSET,
+ uintptr(t.tid),
+ linux.NT_ARM_TLS,
+ uintptr(unsafe.Pointer(&iovec)),
+ 0, 0)
+ if errno != 0 {
+ return errno
+ }
+ return nil
+}
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 72c7ec564..8b72d24e8 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -20,11 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// getRegs gets the general purpose register set.
-func (t *thread) getRegs(regs *syscall.PtraceRegs) error {
+func (t *thread) getRegs(regs *arch.Registers) error {
iovec := syscall.Iovec{
Base: (*byte)(unsafe.Pointer(regs)),
Len: uint64(unsafe.Sizeof(*regs)),
@@ -43,7 +43,7 @@ func (t *thread) getRegs(regs *syscall.PtraceRegs) error {
}
// setRegs sets the general purpose register set.
-func (t *thread) setRegs(regs *syscall.PtraceRegs) error {
+func (t *thread) setRegs(regs *arch.Registers) error {
iovec := syscall.Iovec{
Base: (*byte)(unsafe.Pointer(regs)),
Len: uint64(unsafe.Sizeof(*regs)),
diff --git a/pkg/sentry/platform/ptrace/stub_amd64.s b/pkg/sentry/platform/ptrace/stub_amd64.s
index 64c718d21..16f9c523e 100644
--- a/pkg/sentry/platform/ptrace/stub_amd64.s
+++ b/pkg/sentry/platform/ptrace/stub_amd64.s
@@ -64,6 +64,8 @@ begin:
CMPQ AX, $0
JL error
+ MOVQ $0, BX
+
// SIGSTOP to wait for attach.
//
// The SYSCALL instruction will be used for future syscall injection by
@@ -73,23 +75,26 @@ begin:
MOVQ $SIGSTOP, SI
SYSCALL
- // The tracer may "detach" and/or allow code execution here in three cases:
- //
- // 1. New (traced) stub threads are explicitly detached by the
- // goroutine in newSubprocess. However, they are detached while in
- // group-stop, so they do not execute code here.
- //
- // 2. If a tracer thread exits, it implicitly detaches from the stub,
- // potentially allowing code execution here. However, the Go runtime
- // never exits individual threads, so this case never occurs.
- //
- // 3. subprocess.createStub clones a new stub process that is untraced,
+ // The sentry sets BX to 1 when creating stub process.
+ CMPQ BX, $1
+ JE clone
+
+ // Notify the Sentry that syscall exited.
+done:
+ INT $3
+ // Be paranoid.
+ JMP done
+clone:
+ // subprocess.createStub clones a new stub process that is untraced,
// thus executing this code. We setup the PDEATHSIG before SIGSTOPing
// ourselves for attach by the tracer.
//
// R15 has been updated with the expected PPID.
- JMP begin
+ CMPQ AX, $0
+ JE begin
+ // The clone syscall returns a non-zero value.
+ JMP done
error:
// Exit with -errno.
MOVQ AX, DI
diff --git a/pkg/sentry/platform/ptrace/stub_arm64.s b/pkg/sentry/platform/ptrace/stub_arm64.s
index 2c5e4d5cb..6162df02a 100644
--- a/pkg/sentry/platform/ptrace/stub_arm64.s
+++ b/pkg/sentry/platform/ptrace/stub_arm64.s
@@ -59,6 +59,8 @@ begin:
CMP $0x0, R0
BLT error
+ MOVD $0, R9
+
// SIGSTOP to wait for attach.
//
// The SYSCALL instruction will be used for future syscall injection by
@@ -66,22 +68,26 @@ begin:
MOVD $SYS_KILL, R8
MOVD $SIGSTOP, R1
SVC
- // The tracer may "detach" and/or allow code execution here in three cases:
- //
- // 1. New (traced) stub threads are explicitly detached by the
- // goroutine in newSubprocess. However, they are detached while in
- // group-stop, so they do not execute code here.
- //
- // 2. If a tracer thread exits, it implicitly detaches from the stub,
- // potentially allowing code execution here. However, the Go runtime
- // never exits individual threads, so this case never occurs.
- //
- // 3. subprocess.createStub clones a new stub process that is untraced,
+
+ // The sentry sets R9 to 1 when creating stub process.
+ CMP $1, R9
+ BEQ clone
+
+done:
+ // Notify the Sentry that syscall exited.
+ BRK $3
+ B done // Be paranoid.
+clone:
+ // subprocess.createStub clones a new stub process that is untraced,
// thus executing this code. We setup the PDEATHSIG before SIGSTOPing
// ourselves for attach by the tracer.
//
// R7 has been updated with the expected PPID.
- B begin
+ CMP $0, R0
+ BEQ begin
+
+ // The clone system call returned a non-zero value.
+ B done
error:
// Exit with -errno.
diff --git a/pkg/sentry/platform/ptrace/stub_unsafe.go b/pkg/sentry/platform/ptrace/stub_unsafe.go
index aa1b87237..341dde143 100644
--- a/pkg/sentry/platform/ptrace/stub_unsafe.go
+++ b/pkg/sentry/platform/ptrace/stub_unsafe.go
@@ -19,8 +19,8 @@ import (
"syscall"
"unsafe"
- "gvisor.dev/gvisor/pkg/sentry/platform/safecopy"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/safecopy"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// stub is defined in arch-specific assembly.
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index b699b057d..2389423b0 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -18,14 +18,15 @@ import (
"fmt"
"os"
"runtime"
- "sync"
"syscall"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// Linux kernel errnos which "should never be seen by user programs", but will
@@ -62,7 +63,7 @@ type thread struct {
// initRegs are the initial registers for the first thread.
//
// These are used for the register set for system calls.
- initRegs syscall.PtraceRegs
+ initRegs arch.Registers
}
// threadPool is a collection of threads.
@@ -316,7 +317,7 @@ const (
)
func (t *thread) dumpAndPanic(message string) {
- var regs syscall.PtraceRegs
+ var regs arch.Registers
message += "\n"
if err := t.getRegs(&regs); err == nil {
message += dumpRegs(&regs)
@@ -331,11 +332,12 @@ func (t *thread) unexpectedStubExit() {
msg, err := t.getEventMessage()
status := syscall.WaitStatus(msg)
if status.Signaled() && status.Signal() == syscall.SIGKILL {
- // SIGKILL can be only sent by an user or OOM-killer. In both
+ // SIGKILL can be only sent by a user or OOM-killer. In both
// these cases, we don't need to panic. There is no reasons to
// think that something wrong in gVisor.
log.Warningf("The ptrace stub process %v has been killed by SIGKILL.", t.tgid)
- syscall.Kill(os.Getpid(), syscall.SIGKILL)
+ pid := os.Getpid()
+ syscall.Tgkill(pid, pid, syscall.Signal(syscall.SIGKILL))
}
t.dumpAndPanic(fmt.Sprintf("wait failed: the process %d:%d exited: %x (err %v)", t.tgid, t.tid, msg, err))
}
@@ -421,20 +423,22 @@ func (t *thread) init() {
// This is _not_ for use by application system calls, rather it is for use when
// a system call must be injected into the remote context (e.g. mmap, munmap).
// Note that clones are handled separately.
-func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
+func (t *thread) syscall(regs *arch.Registers) (uintptr, error) {
// Set registers.
if err := t.setRegs(regs); err != nil {
panic(fmt.Sprintf("ptrace set regs failed: %v", err))
}
for {
- // Execute the syscall instruction.
- if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_SYSCALL, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
+ // Execute the syscall instruction. The task has to stop on the
+ // trap instruction which is right after the syscall
+ // instruction.
+ if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_CONT, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
}
sig := t.wait(stopped)
- if sig == (syscallEvent | syscall.SIGTRAP) {
+ if sig == syscall.SIGTRAP {
// Reached syscall-enter-stop.
break
} else {
@@ -446,18 +450,6 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
}
}
- // Complete the actual system call.
- if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_SYSCALL, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
- panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
- }
-
- // Wait for syscall-exit-stop. "[Signal-delivery-stop] never happens
- // between syscall-enter-stop and syscall-exit-stop; it happens *after*
- // syscall-exit-stop.)" - ptrace(2), "Syscall-stops"
- if sig := t.wait(stopped); sig != (syscallEvent | syscall.SIGTRAP) {
- t.dumpAndPanic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig))
- }
-
// Grab registers.
if err := t.getRegs(regs); err != nil {
panic(fmt.Sprintf("ptrace get regs failed: %v", err))
@@ -469,7 +461,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
// syscallIgnoreInterrupt ignores interrupts on the system call thread and
// restarts the syscall if the kernel indicates that should happen.
func (t *thread) syscallIgnoreInterrupt(
- initRegs *syscall.PtraceRegs,
+ initRegs *arch.Registers,
sysno uintptr,
args ...arch.SyscallArgument) (uintptr, error) {
for {
@@ -514,6 +506,9 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
regs := &ac.StateData().Regs
t.resetSysemuRegs(regs)
+ // Extract TLS register
+ tls := uint64(ac.TLS())
+
// Check for interrupts, and ensure that future interrupts will signal t.
if !c.interrupt.Enable(t) {
// Pending interrupt; simulate.
@@ -534,20 +529,23 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
if err := t.setFPRegs(fpState, uint64(fpLen), useXsave); err != nil {
panic(fmt.Sprintf("ptrace set fpregs (%+v) failed: %v", fpState, err))
}
+ if err := t.setTLS(&tls); err != nil {
+ panic(fmt.Sprintf("ptrace set tls (%+v) failed: %v", tls, err))
+ }
for {
// Start running until the next system call.
if isSingleStepping(regs) {
if _, _, errno := syscall.RawSyscall6(
syscall.SYS_PTRACE,
- syscall.PTRACE_SYSEMU_SINGLESTEP,
+ unix.PTRACE_SYSEMU_SINGLESTEP,
uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
}
} else {
if _, _, errno := syscall.RawSyscall6(
syscall.SYS_PTRACE,
- syscall.PTRACE_SYSEMU,
+ unix.PTRACE_SYSEMU,
uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
}
@@ -563,6 +561,12 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
if err := t.getFPRegs(fpState, uint64(fpLen), useXsave); err != nil {
panic(fmt.Sprintf("ptrace get fpregs failed: %v", err))
}
+ if err := t.getTLS(&tls); err != nil {
+ panic(fmt.Sprintf("ptrace get tls failed: %v", err))
+ }
+ if !ac.SetTLS(uintptr(tls)) {
+ panic(fmt.Sprintf("tls value %v is invalid", tls))
+ }
// Is it a system call?
if sig == (syscallEvent | syscall.SIGTRAP) {
diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go
index 4649a94a7..84b699f0d 100644
--- a/pkg/sentry/platform/ptrace/subprocess_amd64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go
@@ -21,6 +21,9 @@ import (
"strings"
"syscall"
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
)
@@ -38,7 +41,7 @@ const (
// resetSysemuRegs sets up emulation registers.
//
// This should be called prior to calling sysemu.
-func (t *thread) resetSysemuRegs(regs *syscall.PtraceRegs) {
+func (t *thread) resetSysemuRegs(regs *arch.Registers) {
regs.Cs = t.initRegs.Cs
regs.Ss = t.initRegs.Ss
regs.Ds = t.initRegs.Ds
@@ -50,7 +53,7 @@ func (t *thread) resetSysemuRegs(regs *syscall.PtraceRegs) {
// createSyscallRegs sets up syscall registers.
//
// This should be called to generate registers for a system call.
-func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch.SyscallArgument) syscall.PtraceRegs {
+func createSyscallRegs(initRegs *arch.Registers, sysno uintptr, args ...arch.SyscallArgument) arch.Registers {
// Copy initial registers.
regs := *initRegs
@@ -79,18 +82,18 @@ func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch
}
// isSingleStepping determines if the registers indicate single-stepping.
-func isSingleStepping(regs *syscall.PtraceRegs) bool {
+func isSingleStepping(regs *arch.Registers) bool {
return (regs.Eflags & arch.X86TrapFlag) != 0
}
// updateSyscallRegs updates registers after finishing sysemu.
-func updateSyscallRegs(regs *syscall.PtraceRegs) {
+func updateSyscallRegs(regs *arch.Registers) {
// Ptrace puts -ENOSYS in rax on syscall-enter-stop.
regs.Rax = regs.Orig_rax
}
// syscallReturnValue extracts a sensible return from registers.
-func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
+func syscallReturnValue(regs *arch.Registers) (uintptr, error) {
rval := int64(regs.Rax)
if rval < 0 {
return 0, syscall.Errno(-rval)
@@ -98,7 +101,7 @@ func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
return uintptr(rval), nil
}
-func dumpRegs(regs *syscall.PtraceRegs) string {
+func dumpRegs(regs *arch.Registers) string {
var m strings.Builder
fmt.Fprintf(&m, "Registers:\n")
@@ -139,7 +142,118 @@ func (t *thread) adjustInitRegsRip() {
t.initRegs.Rip -= initRegsRipAdjustment
}
-// Pass the expected PPID to the child via R15 when creating stub process
-func initChildProcessPPID(initregs *syscall.PtraceRegs, ppid int32) {
+// Pass the expected PPID to the child via R15 when creating stub process.
+func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
initregs.R15 = uint64(ppid)
+ // Rbx has to be set to 1 when creating stub process.
+ initregs.Rbx = 1
+}
+
+// patchSignalInfo patches the signal info to account for hitting the seccomp
+// filters from vsyscall emulation, specified below. We allow for SIGSYS as a
+// synchronous trap, but patch the structure to appear like a SIGSEGV with the
+// Rip as the faulting address.
+//
+// Note that this should only be called after verifying that the signalInfo has
+// been generated by the kernel.
+func patchSignalInfo(regs *arch.Registers, signalInfo *arch.SignalInfo) {
+ if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
+ signalInfo.Signo = int32(linux.SIGSEGV)
+
+ // Unwind the kernel emulation, if any has occurred. A SIGSYS is delivered
+ // with the si_call_addr field pointing to the current RIP. This field
+ // aligns with the si_addr field for a SIGSEGV, so we don't need to touch
+ // anything there. We do need to unwind emulation however, so we set the
+ // instruction pointer to the faulting value, and "unpop" the stack.
+ regs.Rip = signalInfo.Addr()
+ regs.Rsp -= 8
+ }
+}
+
+// enableCpuidFault enables cpuid-faulting.
+//
+// This may fail on older kernels or hardware, so we just disregard the result.
+// Host CPUID will be enabled.
+//
+// This is safe to call in an afterFork context.
+//
+//go:nosplit
+func enableCpuidFault() {
+ syscall.RawSyscall6(syscall.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0, 0, 0, 0)
+}
+
+// appendArchSeccompRules append architecture specific seccomp rules when creating BPF program.
+// Ref attachedThread() for more detail.
+func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFAction) []seccomp.RuleSet {
+ rules = append(rules,
+ // Rules for trapping vsyscall access.
+ seccomp.RuleSet{
+ Rules: seccomp.SyscallRules{
+ syscall.SYS_GETTIMEOFDAY: {},
+ syscall.SYS_TIME: {},
+ unix.SYS_GETCPU: {}, // SYS_GETCPU was not defined in package syscall on amd64.
+ },
+ Action: linux.SECCOMP_RET_TRAP,
+ Vsyscall: true,
+ })
+ if defaultAction != linux.SECCOMP_RET_ALLOW {
+ rules = append(rules,
+ seccomp.RuleSet{
+ Rules: seccomp.SyscallRules{
+ syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
+ {seccomp.AllowValue(linux.ARCH_SET_CPUID), seccomp.AllowValue(0)},
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ })
+ }
+ return rules
+}
+
+// probeSeccomp returns true iff seccomp is run after ptrace notifications,
+// which is generally the case for kernel version >= 4.8. This check is dynamic
+// because kernels have be backported behavior.
+//
+// See createStub for more information.
+//
+// Precondition: the runtime OS thread must be locked.
+func probeSeccomp() bool {
+ // Create a completely new, destroyable process.
+ t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO)
+ if err != nil {
+ panic(fmt.Sprintf("seccomp probe failed: %v", err))
+ }
+ defer t.destroy()
+
+ // Set registers to the yield system call. This call is not allowed
+ // by the filters specified in the attachThread function.
+ regs := createSyscallRegs(&t.initRegs, syscall.SYS_SCHED_YIELD)
+ if err := t.setRegs(&regs); err != nil {
+ panic(fmt.Sprintf("ptrace set regs failed: %v", err))
+ }
+
+ for {
+ // Attempt an emulation.
+ if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, unix.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
+ panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
+ }
+
+ sig := t.wait(stopped)
+ if sig == (syscallEvent | syscall.SIGTRAP) {
+ // Did the seccomp errno hook already run? This would
+ // indicate that seccomp is first in line and we're
+ // less than 4.8.
+ if err := t.getRegs(&regs); err != nil {
+ panic(fmt.Sprintf("ptrace get-regs failed: %v", err))
+ }
+ if _, err := syscallReturnValue(&regs); err == nil {
+ // The seccomp errno mode ran first, and reset
+ // the error in the registers.
+ return false
+ }
+ // The seccomp hook did not run yet, and therefore it
+ // is safe to use RET_KILL mode for dispatched calls.
+ return true
+ }
+ }
}
diff --git a/pkg/sentry/platform/ptrace/subprocess_arm64.go b/pkg/sentry/platform/ptrace/subprocess_arm64.go
index bec884ba5..bd618fae8 100644
--- a/pkg/sentry/platform/ptrace/subprocess_arm64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_arm64.go
@@ -17,8 +17,12 @@
package ptrace
import (
+ "fmt"
+ "strings"
"syscall"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
)
@@ -37,13 +41,13 @@ const (
// resetSysemuRegs sets up emulation registers.
//
// This should be called prior to calling sysemu.
-func (s *subprocess) resetSysemuRegs(regs *syscall.PtraceRegs) {
+func (t *thread) resetSysemuRegs(regs *arch.Registers) {
}
// createSyscallRegs sets up syscall registers.
//
// This should be called to generate registers for a system call.
-func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch.SyscallArgument) syscall.PtraceRegs {
+func createSyscallRegs(initRegs *arch.Registers, sysno uintptr, args ...arch.SyscallArgument) arch.Registers {
// Copy initial registers (Pc, Sp, etc.).
regs := *initRegs
@@ -74,7 +78,7 @@ func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch
}
// isSingleStepping determines if the registers indicate single-stepping.
-func isSingleStepping(regs *syscall.PtraceRegs) bool {
+func isSingleStepping(regs *arch.Registers) bool {
// Refer to the ARM SDM D2.12.3: software step state machine
// return (regs.Pstate.SS == 1) && (MDSCR_EL1.SS == 1).
//
@@ -85,13 +89,13 @@ func isSingleStepping(regs *syscall.PtraceRegs) bool {
}
// updateSyscallRegs updates registers after finishing sysemu.
-func updateSyscallRegs(regs *syscall.PtraceRegs) {
+func updateSyscallRegs(regs *arch.Registers) {
// No special work is necessary.
return
}
// syscallReturnValue extracts a sensible return from registers.
-func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
+func syscallReturnValue(regs *arch.Registers) (uintptr, error) {
rval := int64(regs.Regs[0])
if rval < 0 {
return 0, syscall.Errno(-rval)
@@ -99,7 +103,7 @@ func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
return uintptr(rval), nil
}
-func dumpRegs(regs *syscall.PtraceRegs) string {
+func dumpRegs(regs *arch.Registers) string {
var m strings.Builder
fmt.Fprintf(&m, "Registers:\n")
@@ -121,6 +125,50 @@ func (t *thread) adjustInitRegsRip() {
}
// Pass the expected PPID to the child via X7 when creating stub process
-func initChildProcessPPID(initregs *syscall.PtraceRegs, ppid int32) {
+func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
initregs.Regs[7] = uint64(ppid)
+ // R9 has to be set to 1 when creating stub process.
+ initregs.Regs[9] = 1
+}
+
+// patchSignalInfo patches the signal info to account for hitting the seccomp
+// filters from vsyscall emulation, specified below. We allow for SIGSYS as a
+// synchronous trap, but patch the structure to appear like a SIGSEGV with the
+// Rip as the faulting address.
+//
+// Note that this should only be called after verifying that the signalInfo has
+// been generated by the kernel.
+func patchSignalInfo(regs *arch.Registers, signalInfo *arch.SignalInfo) {
+ if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
+ signalInfo.Signo = int32(linux.SIGSEGV)
+
+ // Unwind the kernel emulation, if any has occurred. A SIGSYS is delivered
+ // with the si_call_addr field pointing to the current RIP. This field
+ // aligns with the si_addr field for a SIGSEGV, so we don't need to touch
+ // anything there. We do need to unwind emulation however, so we set the
+ // instruction pointer to the faulting value, and "unpop" the stack.
+ regs.Pc = signalInfo.Addr()
+ regs.Sp -= 8
+ }
+}
+
+// Noop on arm64.
+//
+//go:nosplit
+func enableCpuidFault() {
+}
+
+// appendArchSeccompRules append architecture specific seccomp rules when creating BPF program.
+// Ref attachedThread() for more detail.
+func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFAction) []seccomp.RuleSet {
+ return rules
+}
+
+// probeSeccomp returns true if seccomp is run after ptrace notifications,
+// which is generally the case for kernel version >= 4.8.
+//
+// On arm64, the support of PTRACE_SYSEMU was added in the 5.3 kernel, so
+// probeSeccomp can always return true.
+func probeSeccomp() bool {
+ return true
}
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go
index 3782d4332..2ce528601 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux.go
@@ -29,75 +29,6 @@ import (
const syscallEvent syscall.Signal = 0x80
-// probeSeccomp returns true iff seccomp is run after ptrace notifications,
-// which is generally the case for kernel version >= 4.8. This check is dynamic
-// because kernels have be backported behavior.
-//
-// See createStub for more information.
-//
-// Precondition: the runtime OS thread must be locked.
-func probeSeccomp() bool {
- // Create a completely new, destroyable process.
- t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO)
- if err != nil {
- panic(fmt.Sprintf("seccomp probe failed: %v", err))
- }
- defer t.destroy()
-
- // Set registers to the yield system call. This call is not allowed
- // by the filters specified in the attachThread function.
- regs := createSyscallRegs(&t.initRegs, syscall.SYS_SCHED_YIELD)
- if err := t.setRegs(&regs); err != nil {
- panic(fmt.Sprintf("ptrace set regs failed: %v", err))
- }
-
- for {
- // Attempt an emulation.
- if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
- panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
- }
-
- sig := t.wait(stopped)
- if sig == (syscallEvent | syscall.SIGTRAP) {
- // Did the seccomp errno hook already run? This would
- // indicate that seccomp is first in line and we're
- // less than 4.8.
- if err := t.getRegs(&regs); err != nil {
- panic(fmt.Sprintf("ptrace get-regs failed: %v", err))
- }
- if _, err := syscallReturnValue(&regs); err == nil {
- // The seccomp errno mode ran first, and reset
- // the error in the registers.
- return false
- }
- // The seccomp hook did not run yet, and therefore it
- // is safe to use RET_KILL mode for dispatched calls.
- return true
- }
- }
-}
-
-// patchSignalInfo patches the signal info to account for hitting the seccomp
-// filters from vsyscall emulation, specified below. We allow for SIGSYS as a
-// synchronous trap, but patch the structure to appear like a SIGSEGV with the
-// Rip as the faulting address.
-//
-// Note that this should only be called after verifying that the signalInfo has
-// been generated by the kernel.
-func patchSignalInfo(regs *syscall.PtraceRegs, signalInfo *arch.SignalInfo) {
- if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
- signalInfo.Signo = int32(linux.SIGSEGV)
-
- // Unwind the kernel emulation, if any has occurred. A SIGSYS is delivered
- // with the si_call_addr field pointing to the current RIP. This field
- // aligns with the si_addr field for a SIGSEGV, so we don't need to touch
- // anything there. We do need to unwind emulation however, so we set the
- // instruction pointer to the faulting value, and "unpop" the stack.
- regs.Rip = signalInfo.Addr()
- regs.Rsp -= 8
- }
-}
-
// createStub creates a fresh stub processes.
//
// Precondition: the runtime OS thread must be locked.
@@ -143,18 +74,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
// stub and all its children. This is used to create child stubs
// (below), so we must include the ability to fork, but otherwise lock
// down available calls only to what is needed.
- rules := []seccomp.RuleSet{
- // Rules for trapping vsyscall access.
- {
- Rules: seccomp.SyscallRules{
- syscall.SYS_GETTIMEOFDAY: {},
- syscall.SYS_TIME: {},
- 309: {}, // SYS_GETCPU.
- },
- Action: linux.SECCOMP_RET_TRAP,
- Vsyscall: true,
- },
- }
+ rules := []seccomp.RuleSet{}
if defaultAction != linux.SECCOMP_RET_ALLOW {
rules = append(rules, seccomp.RuleSet{
Rules: seccomp.SyscallRules{
@@ -173,10 +93,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
// For the initial process creation.
syscall.SYS_WAIT4: {},
- syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
- {seccomp.AllowValue(linux.ARCH_SET_CPUID), seccomp.AllowValue(0)},
- },
- syscall.SYS_EXIT: {},
+ syscall.SYS_EXIT: {},
// For the stub prctl dance (all).
syscall.SYS_PRCTL: []seccomp.Rule{
@@ -197,6 +114,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
Action: linux.SECCOMP_RET_ALLOW,
})
}
+ rules = appendArchSeccompRules(rules, defaultAction)
instrs, err := seccomp.BuildProgram(rules, defaultAction)
if err != nil {
return nil, err
@@ -267,9 +185,8 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0)
}
- // Enable cpuid-faulting; this may fail on older kernels or hardware,
- // so we just disregard the result. Host CPUID will be enabled.
- syscall.RawSyscall6(syscall.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0, 0, 0, 0)
+ // Enable cpuid-faulting.
+ enableCpuidFault()
// Call the stub; should not return.
stubCall(stubStart, ppid)
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go
index de6783fb0..245b20722 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go
@@ -18,13 +18,14 @@
package ptrace
import (
- "sync"
"sync/atomic"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sync"
)
// maskPool contains reusable CPU masks for setting affinity. Unfortunately,
@@ -49,20 +50,6 @@ func unmaskAllSignals() syscall.Errno {
return errno
}
-// getCPU gets the current CPU.
-//
-// Precondition: the current runtime thread should be locked.
-func getCPU() (uint32, error) {
- var cpu uintptr
- if _, _, errno := syscall.RawSyscall(
- unix.SYS_GETCPU,
- uintptr(unsafe.Pointer(&cpu)),
- 0, 0); errno != 0 {
- return 0, errno
- }
- return uint32(cpu), nil
-}
-
// setCPU sets the CPU affinity.
func (t *thread) setCPU(cpu uint32) error {
mask := maskPool.Get().([]uintptr)
@@ -93,10 +80,8 @@ func (t *thread) setCPU(cpu uint32) error {
//
// Precondition: the current runtime thread should be locked.
func (t *thread) bind() {
- currentCPU, err := getCPU()
- if err != nil {
- return
- }
+ currentCPU := hostcpu.GetCPU()
+
if oldCPU := atomic.SwapUint32(&t.cpu, currentCPU); oldCPU != currentCPU {
// Set the affinity on the thread and save the CPU for next
// round; we don't expect CPUs to bounce around too frequently.
diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
index b80a3604d..2ae6b9f9d 100644
--- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.14
+// +build !go1.15
// Check go:linkname function signatures when updating Go version.