diff options
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r-- | pkg/sentry/kernel/kernel_abi_autogen_unsafe.go | 118 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel_state_autogen.go | 3 | ||||
-rw-r--r-- | pkg/sentry/kernel/seccomp.go | 46 | ||||
-rw-r--r-- | pkg/sentry/kernel/signalfd/signalfd.go | 14 | ||||
-rw-r--r-- | pkg/sentry/kernel/vdso.go | 19 |
5 files changed, 164 insertions, 36 deletions
diff --git a/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go index eaa2ebdb3..9ac2a552a 100644 --- a/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go +++ b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go @@ -15,6 +15,7 @@ import ( // Marshallable types used by this file. var _ marshal.Marshallable = (*ThreadID)(nil) +var _ marshal.Marshallable = (*vdsoParams)(nil) // SizeBytes implements marshal.Marshallable.SizeBytes. //go:nosplit @@ -105,3 +106,120 @@ func (t *ThreadID) WriteTo(w io.Writer) (int64, error) { return int64(length), err } +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (v *vdsoParams) SizeBytes() int { + return 64 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (v *vdsoParams) MarshalBytes(dst []byte) { + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicReady)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseCycles)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseRef)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicFrequency)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeReady)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseCycles)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseRef)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeFrequency)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (v *vdsoParams) UnmarshalBytes(src []byte) { + v.monotonicReady = uint64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicBaseCycles = int64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicBaseRef = int64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicFrequency = uint64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeReady = uint64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeBaseCycles = int64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeBaseRef = int64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeFrequency = uint64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (v *vdsoParams) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (v *vdsoParams) MarshalUnsafe(dst []byte) { + safecopy.CopyIn(dst, unsafe.Pointer(v)) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (v *vdsoParams) UnmarshalUnsafe(src []byte) { + safecopy.CopyOut(unsafe.Pointer(v), src) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (v *vdsoParams) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (v *vdsoParams) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) { + return v.CopyOutN(cc, addr, v.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (v *vdsoParams) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (v *vdsoParams) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go index f20800960..7ff5e1eb9 100644 --- a/pkg/sentry/kernel/kernel_state_autogen.go +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -2259,6 +2259,7 @@ func (x *VDSOParamPage) StateFields() []string { "mfp", "fr", "seq", + "copyScratchBuffer", } } @@ -2269,6 +2270,7 @@ func (x *VDSOParamPage) StateSave(m state.Sink) { m.Save(0, &x.mfp) m.Save(1, &x.fr) m.Save(2, &x.seq) + m.Save(3, &x.copyScratchBuffer) } func (x *VDSOParamPage) afterLoad() {} @@ -2277,6 +2279,7 @@ func (x *VDSOParamPage) StateLoad(m state.Source) { m.Load(0, &x.mfp) m.Load(1, &x.fr) m.Load(2, &x.seq) + m.Load(3, &x.copyScratchBuffer) } func init() { diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index c38c5a40c..387edfa91 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -18,7 +18,6 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/syserror" @@ -27,25 +26,18 @@ import ( const maxSyscallFilterInstructions = 1 << 15 -// seccompData is equivalent to struct seccomp_data, which contains the data -// passed to seccomp-bpf filters. -type seccompData struct { - // nr is the system call number. - nr int32 - - // arch is an AUDIT_ARCH_* value indicating the system call convention. - arch uint32 - - // instructionPointer is the value of the instruction pointer at the time - // of the system call. - instructionPointer uint64 - - // args contains the first 6 system call arguments. - args [6]uint64 -} - -func (d *seccompData) asBPFInput() bpf.Input { - return bpf.InputBytes{binary.Marshal(nil, usermem.ByteOrder, d), usermem.ByteOrder} +// dataAsBPFInput returns a serialized BPF program, only valid on the current task +// goroutine. +// +// Note: this is called for every syscall, which is a very hot path. +func dataAsBPFInput(t *Task, d *linux.SeccompData) bpf.Input { + buf := t.CopyScratchBuffer(d.SizeBytes()) + d.MarshalUnsafe(buf) + return bpf.InputBytes{ + Data: buf, + // Go-marshal always uses the native byte order. + Order: usermem.ByteOrder, + } } func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalInfo { @@ -112,20 +104,20 @@ func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip u } func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip usermem.Addr) uint32 { - data := seccompData{ - nr: sysno, - arch: t.tc.st.AuditNumber, - instructionPointer: uint64(ip), + data := linux.SeccompData{ + Nr: sysno, + Arch: t.tc.st.AuditNumber, + InstructionPointer: uint64(ip), } // data.args is []uint64 and args is []arch.SyscallArgument (uintptr), so // we can't do any slicing tricks or even use copy/append here. for i, arg := range args { - if i >= len(data.args) { + if i >= len(data.Args) { break } - data.args[i] = arg.Uint64() + data.Args[i] = arg.Uint64() } - input := data.asBPFInput() + input := dataAsBPFInput(t, &data) ret := uint32(linux.SECCOMP_RET_ALLOW) f := t.syscallFilters.Load() diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index b07e1c1bd..78f718cfe 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -17,7 +17,6 @@ package signalfd import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" @@ -103,8 +102,7 @@ func (s *SignalOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS } // Copy out the signal info using the specified format. - var buf [128]byte - binary.Marshal(buf[:0], usermem.ByteOrder, &linux.SignalfdSiginfo{ + infoNative := linux.SignalfdSiginfo{ Signo: uint32(info.Signo), Errno: info.Errno, Code: info.Code, @@ -113,9 +111,13 @@ func (s *SignalOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS Status: info.Status(), Overrun: uint32(info.Overrun()), Addr: info.Addr(), - }) - n, err := dst.CopyOut(ctx, buf[:]) - return int64(n), err + } + n, err := infoNative.WriteTo(dst.Writer(ctx)) + if err == usermem.ErrEndOfIOSequence { + // Partial copy-out ok. + err = nil + } + return n, err } // Readiness implements waiter.Waitable.Readiness. diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go index e44a139b3..9bc452e67 100644 --- a/pkg/sentry/kernel/vdso.go +++ b/pkg/sentry/kernel/vdso.go @@ -17,7 +17,6 @@ package kernel import ( "fmt" - "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -28,6 +27,8 @@ import ( // // They are exposed to the VDSO via a parameter page managed by VDSOParamPage, // which also includes a sequence counter. +// +// +marshal type vdsoParams struct { monotonicReady uint64 monotonicBaseCycles int64 @@ -68,6 +69,13 @@ type VDSOParamPage struct { // checked in state_test_util tests, causing this field to change across // save / restore. seq uint64 + + // copyScratchBuffer is a temporary buffer used to marshal the params before + // copying it to the real parameter page. The parameter page is typically + // updated at a moderate frequency of ~O(seconds) throughout the lifetime of + // the sentry, so reusing this buffer is a good tradeoff between memory + // usage and the cost of allocation. + copyScratchBuffer []byte } // NewVDSOParamPage returns a VDSOParamPage. @@ -79,7 +87,11 @@ type VDSOParamPage struct { // * VDSOParamPage must be the only writer to fr. // * mfp.MemoryFile().MapInternal(fr) must return a single safemem.Block. func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSOParamPage { - return &VDSOParamPage{mfp: mfp, fr: fr} + return &VDSOParamPage{ + mfp: mfp, + fr: fr, + copyScratchBuffer: make([]byte, (*vdsoParams)(nil).SizeBytes()), + } } // access returns a mapping of the param page. @@ -133,7 +145,8 @@ func (v *VDSOParamPage) Write(f func() vdsoParams) error { // Get the new params. p := f() - buf := binary.Marshal(nil, usermem.ByteOrder, p) + buf := v.copyScratchBuffer[:p.SizeBytes()] + p.MarshalUnsafe(buf) // Skip the sequence counter. if _, err := safemem.Copy(paramPage.DropFirst(8), safemem.BlockFromSafeSlice(buf)); err != nil { |