summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2020-10-02 19:27:31 +0000
committergVisor bot <gvisor-bot@google.com>2020-10-02 19:27:31 +0000
commita0b63cf9de4a2ccac12b5697e7e0f749656da396 (patch)
treeec187a7c2bbb4b2ea1bb2de67e0ed6d26ed62722 /pkg/sentry/kernel
parenta0712b6e67d589768f80acbef2776b06962ade27 (diff)
parent4f462b0ed9912fa19b3a3eab6d2f08a98d364574 (diff)
Merge release-20200921.0-102-g4f462b0ed (automated)
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/kernel_abi_autogen_unsafe.go118
-rw-r--r--pkg/sentry/kernel/kernel_state_autogen.go3
-rw-r--r--pkg/sentry/kernel/seccomp.go46
-rw-r--r--pkg/sentry/kernel/signalfd/signalfd.go14
-rw-r--r--pkg/sentry/kernel/vdso.go19
5 files changed, 164 insertions, 36 deletions
diff --git a/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go
index eaa2ebdb3..9ac2a552a 100644
--- a/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go
+++ b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go
@@ -15,6 +15,7 @@ import (
// Marshallable types used by this file.
var _ marshal.Marshallable = (*ThreadID)(nil)
+var _ marshal.Marshallable = (*vdsoParams)(nil)
// SizeBytes implements marshal.Marshallable.SizeBytes.
//go:nosplit
@@ -105,3 +106,120 @@ func (t *ThreadID) WriteTo(w io.Writer) (int64, error) {
return int64(length), err
}
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (v *vdsoParams) SizeBytes() int {
+ return 64
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (v *vdsoParams) MarshalBytes(dst []byte) {
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicReady))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseCycles))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseRef))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicFrequency))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeReady))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseCycles))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseRef))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeFrequency))
+ dst = dst[8:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (v *vdsoParams) UnmarshalBytes(src []byte) {
+ v.monotonicReady = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.monotonicBaseCycles = int64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.monotonicBaseRef = int64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.monotonicFrequency = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.realtimeReady = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.realtimeBaseCycles = int64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.realtimeBaseRef = int64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ v.realtimeFrequency = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (v *vdsoParams) Packed() bool {
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (v *vdsoParams) MarshalUnsafe(dst []byte) {
+ safecopy.CopyIn(dst, unsafe.Pointer(v))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (v *vdsoParams) UnmarshalUnsafe(src []byte) {
+ safecopy.CopyOut(unsafe.Pointer(v), src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (v *vdsoParams) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v)))
+ hdr.Len = v.SizeBytes()
+ hdr.Cap = v.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that v
+ // must live until the use above.
+ runtime.KeepAlive(v) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (v *vdsoParams) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+ return v.CopyOutN(cc, addr, v.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (v *vdsoParams) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v)))
+ hdr.Len = v.SizeBytes()
+ hdr.Cap = v.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that v
+ // must live until the use above.
+ runtime.KeepAlive(v) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (v *vdsoParams) WriteTo(writer io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v)))
+ hdr.Len = v.SizeBytes()
+ hdr.Cap = v.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that v
+ // must live until the use above.
+ runtime.KeepAlive(v) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go
index f20800960..7ff5e1eb9 100644
--- a/pkg/sentry/kernel/kernel_state_autogen.go
+++ b/pkg/sentry/kernel/kernel_state_autogen.go
@@ -2259,6 +2259,7 @@ func (x *VDSOParamPage) StateFields() []string {
"mfp",
"fr",
"seq",
+ "copyScratchBuffer",
}
}
@@ -2269,6 +2270,7 @@ func (x *VDSOParamPage) StateSave(m state.Sink) {
m.Save(0, &x.mfp)
m.Save(1, &x.fr)
m.Save(2, &x.seq)
+ m.Save(3, &x.copyScratchBuffer)
}
func (x *VDSOParamPage) afterLoad() {}
@@ -2277,6 +2279,7 @@ func (x *VDSOParamPage) StateLoad(m state.Source) {
m.Load(0, &x.mfp)
m.Load(1, &x.fr)
m.Load(2, &x.seq)
+ m.Load(3, &x.copyScratchBuffer)
}
func init() {
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index c38c5a40c..387edfa91 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -18,7 +18,6 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/syserror"
@@ -27,25 +26,18 @@ import (
const maxSyscallFilterInstructions = 1 << 15
-// seccompData is equivalent to struct seccomp_data, which contains the data
-// passed to seccomp-bpf filters.
-type seccompData struct {
- // nr is the system call number.
- nr int32
-
- // arch is an AUDIT_ARCH_* value indicating the system call convention.
- arch uint32
-
- // instructionPointer is the value of the instruction pointer at the time
- // of the system call.
- instructionPointer uint64
-
- // args contains the first 6 system call arguments.
- args [6]uint64
-}
-
-func (d *seccompData) asBPFInput() bpf.Input {
- return bpf.InputBytes{binary.Marshal(nil, usermem.ByteOrder, d), usermem.ByteOrder}
+// dataAsBPFInput returns a serialized BPF program, only valid on the current task
+// goroutine.
+//
+// Note: this is called for every syscall, which is a very hot path.
+func dataAsBPFInput(t *Task, d *linux.SeccompData) bpf.Input {
+ buf := t.CopyScratchBuffer(d.SizeBytes())
+ d.MarshalUnsafe(buf)
+ return bpf.InputBytes{
+ Data: buf,
+ // Go-marshal always uses the native byte order.
+ Order: usermem.ByteOrder,
+ }
}
func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalInfo {
@@ -112,20 +104,20 @@ func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip u
}
func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip usermem.Addr) uint32 {
- data := seccompData{
- nr: sysno,
- arch: t.tc.st.AuditNumber,
- instructionPointer: uint64(ip),
+ data := linux.SeccompData{
+ Nr: sysno,
+ Arch: t.tc.st.AuditNumber,
+ InstructionPointer: uint64(ip),
}
// data.args is []uint64 and args is []arch.SyscallArgument (uintptr), so
// we can't do any slicing tricks or even use copy/append here.
for i, arg := range args {
- if i >= len(data.args) {
+ if i >= len(data.Args) {
break
}
- data.args[i] = arg.Uint64()
+ data.Args[i] = arg.Uint64()
}
- input := data.asBPFInput()
+ input := dataAsBPFInput(t, &data)
ret := uint32(linux.SECCOMP_RET_ALLOW)
f := t.syscallFilters.Load()
diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go
index b07e1c1bd..78f718cfe 100644
--- a/pkg/sentry/kernel/signalfd/signalfd.go
+++ b/pkg/sentry/kernel/signalfd/signalfd.go
@@ -17,7 +17,6 @@ package signalfd
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
@@ -103,8 +102,7 @@ func (s *SignalOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
}
// Copy out the signal info using the specified format.
- var buf [128]byte
- binary.Marshal(buf[:0], usermem.ByteOrder, &linux.SignalfdSiginfo{
+ infoNative := linux.SignalfdSiginfo{
Signo: uint32(info.Signo),
Errno: info.Errno,
Code: info.Code,
@@ -113,9 +111,13 @@ func (s *SignalOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
Status: info.Status(),
Overrun: uint32(info.Overrun()),
Addr: info.Addr(),
- })
- n, err := dst.CopyOut(ctx, buf[:])
- return int64(n), err
+ }
+ n, err := infoNative.WriteTo(dst.Writer(ctx))
+ if err == usermem.ErrEndOfIOSequence {
+ // Partial copy-out ok.
+ err = nil
+ }
+ return n, err
}
// Readiness implements waiter.Waitable.Readiness.
diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go
index e44a139b3..9bc452e67 100644
--- a/pkg/sentry/kernel/vdso.go
+++ b/pkg/sentry/kernel/vdso.go
@@ -17,7 +17,6 @@ package kernel
import (
"fmt"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -28,6 +27,8 @@ import (
//
// They are exposed to the VDSO via a parameter page managed by VDSOParamPage,
// which also includes a sequence counter.
+//
+// +marshal
type vdsoParams struct {
monotonicReady uint64
monotonicBaseCycles int64
@@ -68,6 +69,13 @@ type VDSOParamPage struct {
// checked in state_test_util tests, causing this field to change across
// save / restore.
seq uint64
+
+ // copyScratchBuffer is a temporary buffer used to marshal the params before
+ // copying it to the real parameter page. The parameter page is typically
+ // updated at a moderate frequency of ~O(seconds) throughout the lifetime of
+ // the sentry, so reusing this buffer is a good tradeoff between memory
+ // usage and the cost of allocation.
+ copyScratchBuffer []byte
}
// NewVDSOParamPage returns a VDSOParamPage.
@@ -79,7 +87,11 @@ type VDSOParamPage struct {
// * VDSOParamPage must be the only writer to fr.
// * mfp.MemoryFile().MapInternal(fr) must return a single safemem.Block.
func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSOParamPage {
- return &VDSOParamPage{mfp: mfp, fr: fr}
+ return &VDSOParamPage{
+ mfp: mfp,
+ fr: fr,
+ copyScratchBuffer: make([]byte, (*vdsoParams)(nil).SizeBytes()),
+ }
}
// access returns a mapping of the param page.
@@ -133,7 +145,8 @@ func (v *VDSOParamPage) Write(f func() vdsoParams) error {
// Get the new params.
p := f()
- buf := binary.Marshal(nil, usermem.ByteOrder, p)
+ buf := v.copyScratchBuffer[:p.SizeBytes()]
+ p.MarshalUnsafe(buf)
// Skip the sequence counter.
if _, err := safemem.Copy(paramPage.DropFirst(8), safemem.BlockFromSafeSlice(buf)); err != nil {