diff options
Diffstat (limited to 'pkg/sentry')
460 files changed, 58047 insertions, 26229 deletions
diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD deleted file mode 100644 index e759dc36f..000000000 --- a/pkg/sentry/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -package(licenses = ["notice"]) - -# The "internal" package_group should be used as much as possible by packages -# that should remain Sentry-internal (i.e. not be exposed directly to command -# line tooling or APIs). -package_group( - name = "internal", - packages = [ - "//pkg/sentry/...", - "//runsc/...", - # Code generated by go_marshal relies on go_marshal libraries. - "//tools/go_marshal/...", - ], -) diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD deleted file mode 100644 index e0dbc436d..000000000 --- a/pkg/sentry/arch/BUILD +++ /dev/null @@ -1,47 +0,0 @@ -load("//tools:defs.bzl", "go_library", "proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "arch", - srcs = [ - "aligned.go", - "arch.go", - "arch_aarch64.go", - "arch_amd64.go", - "arch_arm64.go", - "arch_state_x86.go", - "arch_x86.go", - "arch_x86_impl.go", - "auxv.go", - "signal_amd64.go", - "signal_arm64.go", - "stack.go", - "stack_unsafe.go", - "syscalls_amd64.go", - "syscalls_arm64.go", - ], - marshal = True, - visibility = ["//:sandbox"], - deps = [ - ":registers_go_proto", - "//pkg/abi/linux", - "//pkg/context", - "//pkg/cpuid", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/sentry/arch/fpu", - "//pkg/sentry/limits", - "//pkg/usermem", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -proto_library( - name = "registers", - srcs = ["registers.proto"], - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/arch/arch_aarch64_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_aarch64_abi_autogen_unsafe.go new file mode 100644 index 000000000..0a1eae844 --- /dev/null +++ b/pkg/sentry/arch/arch_aarch64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build arm64 +// +build arm64 + +package arch + +import ( +) + diff --git a/pkg/sentry/arch/arch_aarch64_state_autogen.go b/pkg/sentry/arch/arch_aarch64_state_autogen.go new file mode 100644 index 000000000..6faaa9f08 --- /dev/null +++ b/pkg/sentry/arch/arch_aarch64_state_autogen.go @@ -0,0 +1,77 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registers) StateTypeName() string { + return "pkg/sentry/arch.Registers" +} + +func (r *Registers) StateFields() []string { + return []string{ + "PtraceRegs", + "TPIDR_EL0", + } +} + +func (r *Registers) beforeSave() {} + +// +checklocksignore +func (r *Registers) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.PtraceRegs) + stateSinkObject.Save(1, &r.TPIDR_EL0) +} + +func (r *Registers) afterLoad() {} + +// +checklocksignore +func (r *Registers) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.PtraceRegs) + stateSourceObject.Load(1, &r.TPIDR_EL0) +} + +func (s *State) StateTypeName() string { + return "pkg/sentry/arch.State" +} + +func (s *State) StateFields() []string { + return []string{ + "Regs", + "fpState", + "FeatureSet", + "OrigR0", + } +} + +func (s *State) beforeSave() {} + +// +checklocksignore +func (s *State) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Regs) + stateSinkObject.Save(1, &s.fpState) + stateSinkObject.Save(2, &s.FeatureSet) + stateSinkObject.Save(3, &s.OrigR0) +} + +func (s *State) afterLoad() {} + +// +checklocksignore +func (s *State) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Regs) + stateSourceObject.LoadWait(1, &s.fpState) + stateSourceObject.Load(2, &s.FeatureSet) + stateSourceObject.Load(3, &s.OrigR0) +} + +func init() { + state.Register((*Registers)(nil)) + state.Register((*State)(nil)) +} diff --git a/pkg/sentry/arch/arch_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_abi_autogen_unsafe.go new file mode 100644 index 000000000..2167ed9c9 --- /dev/null +++ b/pkg/sentry/arch/arch_abi_autogen_unsafe.go @@ -0,0 +1,7 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package arch + +import ( +) + diff --git a/pkg/sentry/arch/arch_amd64_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_amd64_abi_autogen_unsafe.go new file mode 100644 index 000000000..dd850b1b8 --- /dev/null +++ b/pkg/sentry/arch/arch_amd64_abi_autogen_unsafe.go @@ -0,0 +1,411 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build amd64 && amd64 && amd64 +// +build amd64,amd64,amd64 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*SignalContext64)(nil) +var _ marshal.Marshallable = (*UContext64)(nil) +var _ marshal.Marshallable = (*linux.SignalSet)(nil) +var _ marshal.Marshallable = (*linux.SignalStack)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *SignalContext64) SizeBytes() int { + return 184 + + (*linux.SignalSet)(nil).SizeBytes() + + 8*8 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *SignalContext64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R8)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R9)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R10)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R11)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R12)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R13)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R14)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.R15)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rdi)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rsi)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rbp)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rbx)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rdx)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rax)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rcx)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rsp)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Rip)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Eflags)) + dst = dst[8:] + hostarch.ByteOrder.PutUint16(dst[:2], uint16(s.Cs)) + dst = dst[2:] + hostarch.ByteOrder.PutUint16(dst[:2], uint16(s.Gs)) + dst = dst[2:] + hostarch.ByteOrder.PutUint16(dst[:2], uint16(s.Fs)) + dst = dst[2:] + hostarch.ByteOrder.PutUint16(dst[:2], uint16(s.Ss)) + dst = dst[2:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Err)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Trapno)) + dst = dst[8:] + s.Oldmask.MarshalBytes(dst[:s.Oldmask.SizeBytes()]) + dst = dst[s.Oldmask.SizeBytes():] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Cr2)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Fpstate)) + dst = dst[8:] + for idx := 0; idx < 8; idx++ { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Reserved[idx])) + dst = dst[8:] + } +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *SignalContext64) UnmarshalBytes(src []byte) { + s.R8 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R9 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R10 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R11 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R12 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R13 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R14 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.R15 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rdi = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rsi = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rbp = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rbx = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rdx = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rax = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rcx = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rsp = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Rip = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Eflags = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Cs = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + s.Gs = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + s.Fs = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + s.Ss = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + s.Err = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Trapno = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Oldmask.UnmarshalBytes(src[:s.Oldmask.SizeBytes()]) + src = src[s.Oldmask.SizeBytes():] + s.Cr2 = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Fpstate = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + for idx := 0; idx < 8; idx++ { + s.Reserved[idx] = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + } +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (s *SignalContext64) Packed() bool { + return s.Oldmask.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (s *SignalContext64) MarshalUnsafe(dst []byte) { + if s.Oldmask.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(s), uintptr(s.SizeBytes())) + } else { + // Type SignalContext64 doesn't have a packed layout in memory, fallback to MarshalBytes. + s.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (s *SignalContext64) UnmarshalUnsafe(src []byte) { + if s.Oldmask.Packed() { + gohacks.Memmove(unsafe.Pointer(s), unsafe.Pointer(&src[0]), uintptr(s.SizeBytes())) + } else { + // Type SignalContext64 doesn't have a packed layout in memory, fallback to UnmarshalBytes. + s.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (s *SignalContext64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !s.Oldmask.Packed() { + // Type SignalContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. + s.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (s *SignalContext64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return s.CopyOutN(cc, addr, s.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (s *SignalContext64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !s.Oldmask.Packed() { + // Type SignalContext64 doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + s.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (s *SignalContext64) WriteTo(writer io.Writer) (int64, error) { + if !s.Oldmask.Packed() { + // Type SignalContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, s.SizeBytes()) + s.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (u *UContext64) SizeBytes() int { + return 16 + + (*linux.SignalStack)(nil).SizeBytes() + + (*SignalContext64)(nil).SizeBytes() + + (*linux.SignalSet)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (u *UContext64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(u.Flags)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(u.Link)) + dst = dst[8:] + u.Stack.MarshalBytes(dst[:u.Stack.SizeBytes()]) + dst = dst[u.Stack.SizeBytes():] + u.MContext.MarshalBytes(dst[:u.MContext.SizeBytes()]) + dst = dst[u.MContext.SizeBytes():] + u.Sigset.MarshalBytes(dst[:u.Sigset.SizeBytes()]) + dst = dst[u.Sigset.SizeBytes():] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (u *UContext64) UnmarshalBytes(src []byte) { + u.Flags = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + u.Link = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + u.Stack.UnmarshalBytes(src[:u.Stack.SizeBytes()]) + src = src[u.Stack.SizeBytes():] + u.MContext.UnmarshalBytes(src[:u.MContext.SizeBytes()]) + src = src[u.MContext.SizeBytes():] + u.Sigset.UnmarshalBytes(src[:u.Sigset.SizeBytes()]) + src = src[u.Sigset.SizeBytes():] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (u *UContext64) Packed() bool { + return u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (u *UContext64) MarshalUnsafe(dst []byte) { + if u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(u), uintptr(u.SizeBytes())) + } else { + // Type UContext64 doesn't have a packed layout in memory, fallback to MarshalBytes. + u.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (u *UContext64) UnmarshalUnsafe(src []byte) { + if u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + gohacks.Memmove(unsafe.Pointer(u), unsafe.Pointer(&src[0]), uintptr(u.SizeBytes())) + } else { + // Type UContext64 doesn't have a packed layout in memory, fallback to UnmarshalBytes. + u.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (u *UContext64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + // Type UContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(u.SizeBytes()) // escapes: okay. + u.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (u *UContext64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return u.CopyOutN(cc, addr, u.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (u *UContext64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + // Type UContext64 doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(u.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + u.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (u *UContext64) WriteTo(writer io.Writer) (int64, error) { + if !u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + // Type UContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, u.SizeBytes()) + u.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/arch/arch_amd64_state_autogen.go b/pkg/sentry/arch/arch_amd64_state_autogen.go new file mode 100644 index 000000000..501572f2c --- /dev/null +++ b/pkg/sentry/arch/arch_amd64_state_autogen.go @@ -0,0 +1,42 @@ +// automatically generated by stateify. + +//go:build amd64 && amd64 && amd64 +// +build amd64,amd64,amd64 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *context64) StateTypeName() string { + return "pkg/sentry/arch.context64" +} + +func (c *context64) StateFields() []string { + return []string{ + "State", + "sigFPState", + } +} + +func (c *context64) beforeSave() {} + +// +checklocksignore +func (c *context64) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.State) + stateSinkObject.Save(1, &c.sigFPState) +} + +func (c *context64) afterLoad() {} + +// +checklocksignore +func (c *context64) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.State) + stateSourceObject.Load(1, &c.sigFPState) +} + +func init() { + state.Register((*context64)(nil)) +} diff --git a/pkg/sentry/arch/arch_arm64_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_arm64_abi_autogen_unsafe.go new file mode 100644 index 000000000..52ca0b185 --- /dev/null +++ b/pkg/sentry/arch/arch_arm64_abi_autogen_unsafe.go @@ -0,0 +1,587 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build arm64 && arm64 && arm64 +// +build arm64,arm64,arm64 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*FpsimdContext)(nil) +var _ marshal.Marshallable = (*SignalContext64)(nil) +var _ marshal.Marshallable = (*UContext64)(nil) +var _ marshal.Marshallable = (*aarch64Ctx)(nil) +var _ marshal.Marshallable = (*linux.SignalSet)(nil) +var _ marshal.Marshallable = (*linux.SignalStack)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (f *FpsimdContext) SizeBytes() int { + return 8 + + (*aarch64Ctx)(nil).SizeBytes() + + 8*64 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (f *FpsimdContext) MarshalBytes(dst []byte) { + f.Head.MarshalBytes(dst[:f.Head.SizeBytes()]) + dst = dst[f.Head.SizeBytes():] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(f.Fpsr)) + dst = dst[4:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(f.Fpcr)) + dst = dst[4:] + for idx := 0; idx < 64; idx++ { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(f.Vregs[idx])) + dst = dst[8:] + } +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (f *FpsimdContext) UnmarshalBytes(src []byte) { + f.Head.UnmarshalBytes(src[:f.Head.SizeBytes()]) + src = src[f.Head.SizeBytes():] + f.Fpsr = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + f.Fpcr = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + for idx := 0; idx < 64; idx++ { + f.Vregs[idx] = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + } +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (f *FpsimdContext) Packed() bool { + return f.Head.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (f *FpsimdContext) MarshalUnsafe(dst []byte) { + if f.Head.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(f), uintptr(f.SizeBytes())) + } else { + // Type FpsimdContext doesn't have a packed layout in memory, fallback to MarshalBytes. + f.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (f *FpsimdContext) UnmarshalUnsafe(src []byte) { + if f.Head.Packed() { + gohacks.Memmove(unsafe.Pointer(f), unsafe.Pointer(&src[0]), uintptr(f.SizeBytes())) + } else { + // Type FpsimdContext doesn't have a packed layout in memory, fallback to UnmarshalBytes. + f.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (f *FpsimdContext) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !f.Head.Packed() { + // Type FpsimdContext doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(f.SizeBytes()) // escapes: okay. + f.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f))) + hdr.Len = f.SizeBytes() + hdr.Cap = f.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that f + // must live until the use above. + runtime.KeepAlive(f) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (f *FpsimdContext) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return f.CopyOutN(cc, addr, f.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (f *FpsimdContext) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !f.Head.Packed() { + // Type FpsimdContext doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(f.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + f.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f))) + hdr.Len = f.SizeBytes() + hdr.Cap = f.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that f + // must live until the use above. + runtime.KeepAlive(f) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (f *FpsimdContext) WriteTo(writer io.Writer) (int64, error) { + if !f.Head.Packed() { + // Type FpsimdContext doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, f.SizeBytes()) + f.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f))) + hdr.Len = f.SizeBytes() + hdr.Cap = f.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that f + // must live until the use above. + runtime.KeepAlive(f) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *SignalContext64) SizeBytes() int { + return 32 + + 8*31 + + 1*8 + + (*FpsimdContext)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *SignalContext64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.FaultAddr)) + dst = dst[8:] + for idx := 0; idx < 31; idx++ { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Regs[idx])) + dst = dst[8:] + } + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Sp)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Pc)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.Pstate)) + dst = dst[8:] + for idx := 0; idx < 8; idx++ { + dst[0] = byte(s._pad[idx]) + dst = dst[1:] + } + s.Fpsimd64.MarshalBytes(dst[:s.Fpsimd64.SizeBytes()]) + dst = dst[s.Fpsimd64.SizeBytes():] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *SignalContext64) UnmarshalBytes(src []byte) { + s.FaultAddr = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + for idx := 0; idx < 31; idx++ { + s.Regs[idx] = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + } + s.Sp = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Pc = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.Pstate = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + for idx := 0; idx < 8; idx++ { + s._pad[idx] = src[0] + src = src[1:] + } + s.Fpsimd64.UnmarshalBytes(src[:s.Fpsimd64.SizeBytes()]) + src = src[s.Fpsimd64.SizeBytes():] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (s *SignalContext64) Packed() bool { + return s.Fpsimd64.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (s *SignalContext64) MarshalUnsafe(dst []byte) { + if s.Fpsimd64.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(s), uintptr(s.SizeBytes())) + } else { + // Type SignalContext64 doesn't have a packed layout in memory, fallback to MarshalBytes. + s.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (s *SignalContext64) UnmarshalUnsafe(src []byte) { + if s.Fpsimd64.Packed() { + gohacks.Memmove(unsafe.Pointer(s), unsafe.Pointer(&src[0]), uintptr(s.SizeBytes())) + } else { + // Type SignalContext64 doesn't have a packed layout in memory, fallback to UnmarshalBytes. + s.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (s *SignalContext64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !s.Fpsimd64.Packed() { + // Type SignalContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. + s.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (s *SignalContext64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return s.CopyOutN(cc, addr, s.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (s *SignalContext64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !s.Fpsimd64.Packed() { + // Type SignalContext64 doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + s.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (s *SignalContext64) WriteTo(writer io.Writer) (int64, error) { + if !s.Fpsimd64.Packed() { + // Type SignalContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, s.SizeBytes()) + s.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (u *UContext64) SizeBytes() int { + return 16 + + (*linux.SignalStack)(nil).SizeBytes() + + (*linux.SignalSet)(nil).SizeBytes() + + 1*120 + + 1*8 + + (*SignalContext64)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (u *UContext64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(u.Flags)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(u.Link)) + dst = dst[8:] + u.Stack.MarshalBytes(dst[:u.Stack.SizeBytes()]) + dst = dst[u.Stack.SizeBytes():] + u.Sigset.MarshalBytes(dst[:u.Sigset.SizeBytes()]) + dst = dst[u.Sigset.SizeBytes():] + for idx := 0; idx < 120; idx++ { + dst[0] = byte(u._pad[idx]) + dst = dst[1:] + } + for idx := 0; idx < 8; idx++ { + dst[0] = byte(u._pad2[idx]) + dst = dst[1:] + } + u.MContext.MarshalBytes(dst[:u.MContext.SizeBytes()]) + dst = dst[u.MContext.SizeBytes():] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (u *UContext64) UnmarshalBytes(src []byte) { + u.Flags = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + u.Link = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + u.Stack.UnmarshalBytes(src[:u.Stack.SizeBytes()]) + src = src[u.Stack.SizeBytes():] + u.Sigset.UnmarshalBytes(src[:u.Sigset.SizeBytes()]) + src = src[u.Sigset.SizeBytes():] + for idx := 0; idx < 120; idx++ { + u._pad[idx] = src[0] + src = src[1:] + } + for idx := 0; idx < 8; idx++ { + u._pad2[idx] = src[0] + src = src[1:] + } + u.MContext.UnmarshalBytes(src[:u.MContext.SizeBytes()]) + src = src[u.MContext.SizeBytes():] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (u *UContext64) Packed() bool { + return u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (u *UContext64) MarshalUnsafe(dst []byte) { + if u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(u), uintptr(u.SizeBytes())) + } else { + // Type UContext64 doesn't have a packed layout in memory, fallback to MarshalBytes. + u.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (u *UContext64) UnmarshalUnsafe(src []byte) { + if u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + gohacks.Memmove(unsafe.Pointer(u), unsafe.Pointer(&src[0]), uintptr(u.SizeBytes())) + } else { + // Type UContext64 doesn't have a packed layout in memory, fallback to UnmarshalBytes. + u.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (u *UContext64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + // Type UContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(u.SizeBytes()) // escapes: okay. + u.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (u *UContext64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return u.CopyOutN(cc, addr, u.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (u *UContext64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + // Type UContext64 doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(u.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + u.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (u *UContext64) WriteTo(writer io.Writer) (int64, error) { + if !u.MContext.Packed() && u.Sigset.Packed() && u.Stack.Packed() { + // Type UContext64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, u.SizeBytes()) + u.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (a *aarch64Ctx) SizeBytes() int { + return 8 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (a *aarch64Ctx) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(a.Magic)) + dst = dst[4:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(a.Size)) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (a *aarch64Ctx) UnmarshalBytes(src []byte) { + a.Magic = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + a.Size = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (a *aarch64Ctx) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (a *aarch64Ctx) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(a), uintptr(a.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (a *aarch64Ctx) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(a), unsafe.Pointer(&src[0]), uintptr(a.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (a *aarch64Ctx) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(a))) + hdr.Len = a.SizeBytes() + hdr.Cap = a.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that a + // must live until the use above. + runtime.KeepAlive(a) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (a *aarch64Ctx) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return a.CopyOutN(cc, addr, a.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (a *aarch64Ctx) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(a))) + hdr.Len = a.SizeBytes() + hdr.Cap = a.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that a + // must live until the use above. + runtime.KeepAlive(a) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (a *aarch64Ctx) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(a))) + hdr.Len = a.SizeBytes() + hdr.Cap = a.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that a + // must live until the use above. + runtime.KeepAlive(a) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/arch/arch_arm64_state_autogen.go b/pkg/sentry/arch/arch_arm64_state_autogen.go new file mode 100644 index 000000000..45bfbfca1 --- /dev/null +++ b/pkg/sentry/arch/arch_arm64_state_autogen.go @@ -0,0 +1,42 @@ +// automatically generated by stateify. + +//go:build arm64 && arm64 && arm64 +// +build arm64,arm64,arm64 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *context64) StateTypeName() string { + return "pkg/sentry/arch.context64" +} + +func (c *context64) StateFields() []string { + return []string{ + "State", + "sigFPState", + } +} + +func (c *context64) beforeSave() {} + +// +checklocksignore +func (c *context64) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.State) + stateSinkObject.Save(1, &c.sigFPState) +} + +func (c *context64) afterLoad() {} + +// +checklocksignore +func (c *context64) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.State) + stateSourceObject.Load(1, &c.sigFPState) +} + +func init() { + state.Register((*context64)(nil)) +} diff --git a/pkg/sentry/arch/arch_state_autogen.go b/pkg/sentry/arch/arch_state_autogen.go new file mode 100644 index 000000000..8a151f95c --- /dev/null +++ b/pkg/sentry/arch/arch_state_autogen.go @@ -0,0 +1,80 @@ +// automatically generated by stateify. + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (m *MmapLayout) StateTypeName() string { + return "pkg/sentry/arch.MmapLayout" +} + +func (m *MmapLayout) StateFields() []string { + return []string{ + "MinAddr", + "MaxAddr", + "BottomUpBase", + "TopDownBase", + "DefaultDirection", + "MaxStackRand", + } +} + +func (m *MmapLayout) beforeSave() {} + +// +checklocksignore +func (m *MmapLayout) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.MinAddr) + stateSinkObject.Save(1, &m.MaxAddr) + stateSinkObject.Save(2, &m.BottomUpBase) + stateSinkObject.Save(3, &m.TopDownBase) + stateSinkObject.Save(4, &m.DefaultDirection) + stateSinkObject.Save(5, &m.MaxStackRand) +} + +func (m *MmapLayout) afterLoad() {} + +// +checklocksignore +func (m *MmapLayout) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.MinAddr) + stateSourceObject.Load(1, &m.MaxAddr) + stateSourceObject.Load(2, &m.BottomUpBase) + stateSourceObject.Load(3, &m.TopDownBase) + stateSourceObject.Load(4, &m.DefaultDirection) + stateSourceObject.Load(5, &m.MaxStackRand) +} + +func (a *AuxEntry) StateTypeName() string { + return "pkg/sentry/arch.AuxEntry" +} + +func (a *AuxEntry) StateFields() []string { + return []string{ + "Key", + "Value", + } +} + +func (a *AuxEntry) beforeSave() {} + +// +checklocksignore +func (a *AuxEntry) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.Key) + stateSinkObject.Save(1, &a.Value) +} + +func (a *AuxEntry) afterLoad() {} + +// +checklocksignore +func (a *AuxEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.Key) + stateSourceObject.Load(1, &a.Value) +} + +func init() { + state.Register((*MmapLayout)(nil)) + state.Register((*AuxEntry)(nil)) +} diff --git a/pkg/sentry/arch/arch_unsafe_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_unsafe_abi_autogen_unsafe.go new file mode 100644 index 000000000..2167ed9c9 --- /dev/null +++ b/pkg/sentry/arch/arch_unsafe_abi_autogen_unsafe.go @@ -0,0 +1,7 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package arch + +import ( +) + diff --git a/pkg/sentry/arch/arch_unsafe_state_autogen.go b/pkg/sentry/arch/arch_unsafe_state_autogen.go new file mode 100644 index 000000000..9a45071b9 --- /dev/null +++ b/pkg/sentry/arch/arch_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package arch diff --git a/pkg/sentry/arch/arch_x86_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_x86_abi_autogen_unsafe.go new file mode 100644 index 000000000..97a463c9f --- /dev/null +++ b/pkg/sentry/arch/arch_x86_abi_autogen_unsafe.go @@ -0,0 +1,17 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build (amd64 || 386) && (amd64 || 386) +// +build amd64 386 +// +build amd64 386 + +package arch + +import ( +) + diff --git a/pkg/sentry/arch/arch_x86_impl_abi_autogen_unsafe.go b/pkg/sentry/arch/arch_x86_impl_abi_autogen_unsafe.go new file mode 100644 index 000000000..2b2c59d95 --- /dev/null +++ b/pkg/sentry/arch/arch_x86_impl_abi_autogen_unsafe.go @@ -0,0 +1,17 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build (amd64 || 386) && go1.1 +// +build amd64 386 +// +build go1.1 + +package arch + +import ( +) + diff --git a/pkg/sentry/arch/arch_x86_impl_state_autogen.go b/pkg/sentry/arch/arch_x86_impl_state_autogen.go new file mode 100644 index 000000000..edcc2bee5 --- /dev/null +++ b/pkg/sentry/arch/arch_x86_impl_state_autogen.go @@ -0,0 +1,45 @@ +// automatically generated by stateify. + +//go:build (amd64 || 386) && go1.1 +// +build amd64 386 +// +build go1.1 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *State) StateTypeName() string { + return "pkg/sentry/arch.State" +} + +func (s *State) StateFields() []string { + return []string{ + "Regs", + "fpState", + "FeatureSet", + } +} + +func (s *State) beforeSave() {} + +// +checklocksignore +func (s *State) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Regs) + stateSinkObject.Save(1, &s.fpState) + stateSinkObject.Save(2, &s.FeatureSet) +} + +// +checklocksignore +func (s *State) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Regs) + stateSourceObject.LoadWait(1, &s.fpState) + stateSourceObject.Load(2, &s.FeatureSet) + stateSourceObject.AfterLoad(s.afterLoad) +} + +func init() { + state.Register((*State)(nil)) +} diff --git a/pkg/sentry/arch/arch_x86_state_autogen.go b/pkg/sentry/arch/arch_x86_state_autogen.go new file mode 100644 index 000000000..7785fb645 --- /dev/null +++ b/pkg/sentry/arch/arch_x86_state_autogen.go @@ -0,0 +1,40 @@ +// automatically generated by stateify. + +//go:build (amd64 || 386) && (amd64 || 386) +// +build amd64 386 +// +build amd64 386 + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registers) StateTypeName() string { + return "pkg/sentry/arch.Registers" +} + +func (r *Registers) StateFields() []string { + return []string{ + "PtraceRegs", + } +} + +func (r *Registers) beforeSave() {} + +// +checklocksignore +func (r *Registers) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.PtraceRegs) +} + +func (r *Registers) afterLoad() {} + +// +checklocksignore +func (r *Registers) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.PtraceRegs) +} + +func init() { + state.Register((*Registers)(nil)) +} diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD deleted file mode 100644 index 6cdd21b1b..000000000 --- a/pkg/sentry/arch/fpu/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "fpu", - srcs = [ - "fpu.go", - "fpu_amd64.go", - "fpu_amd64.s", - "fpu_arm64.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/cpuid", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/arch/fpu/fpu_amd64_state_autogen.go b/pkg/sentry/arch/fpu/fpu_amd64_state_autogen.go new file mode 100644 index 000000000..d31975f85 --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 || i386 +// +build amd64 i386 + +package fpu diff --git a/pkg/sentry/arch/fpu/fpu_arm64_state_autogen.go b/pkg/sentry/arch/fpu/fpu_arm64_state_autogen.go new file mode 100644 index 000000000..7b28aba81 --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package fpu diff --git a/pkg/sentry/arch/fpu/fpu_state_autogen.go b/pkg/sentry/arch/fpu/fpu_state_autogen.go new file mode 100644 index 000000000..7cadc6b8f --- /dev/null +++ b/pkg/sentry/arch/fpu/fpu_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package fpu diff --git a/pkg/sentry/arch/registers.proto b/pkg/sentry/arch/registers.proto deleted file mode 100644 index 2727ba08a..000000000 --- a/pkg/sentry/arch/registers.proto +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -message AMD64Registers { - uint64 rax = 1; - uint64 rbx = 2; - uint64 rcx = 3; - uint64 rdx = 4; - uint64 rsi = 5; - uint64 rdi = 6; - uint64 rsp = 7; - uint64 rbp = 8; - - uint64 r8 = 9; - uint64 r9 = 10; - uint64 r10 = 11; - uint64 r11 = 12; - uint64 r12 = 13; - uint64 r13 = 14; - uint64 r14 = 15; - uint64 r15 = 16; - - uint64 rip = 17; - uint64 rflags = 18; - uint64 orig_rax = 19; - uint64 cs = 20; - uint64 ds = 21; - uint64 es = 22; - uint64 fs = 23; - uint64 gs = 24; - uint64 ss = 25; - uint64 fs_base = 26; - uint64 gs_base = 27; -} - -message ARM64Registers { - uint64 r0 = 1; - uint64 r1 = 2; - uint64 r2 = 3; - uint64 r3 = 4; - uint64 r4 = 5; - uint64 r5 = 6; - uint64 r6 = 7; - uint64 r7 = 8; - uint64 r8 = 9; - uint64 r9 = 10; - uint64 r10 = 11; - uint64 r11 = 12; - uint64 r12 = 13; - uint64 r13 = 14; - uint64 r14 = 15; - uint64 r15 = 16; - uint64 r16 = 17; - uint64 r17 = 18; - uint64 r18 = 19; - uint64 r19 = 20; - uint64 r20 = 21; - uint64 r21 = 22; - uint64 r22 = 23; - uint64 r23 = 24; - uint64 r24 = 25; - uint64 r25 = 26; - uint64 r26 = 27; - uint64 r27 = 28; - uint64 r28 = 29; - uint64 r29 = 30; - uint64 r30 = 31; - uint64 sp = 32; - uint64 pc = 33; - uint64 pstate = 34; - uint64 tls = 35; -} -message Registers { - oneof arch { - AMD64Registers amd64 = 1; - ARM64Registers arm64 = 2; - } -} diff --git a/pkg/sentry/arch/registers_go_proto/registers.pb.go b/pkg/sentry/arch/registers_go_proto/registers.pb.go new file mode 100644 index 000000000..0e73a10b8 --- /dev/null +++ b/pkg/sentry/arch/registers_go_proto/registers.pb.go @@ -0,0 +1,863 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/arch/registers.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type AMD64Registers struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Rax uint64 `protobuf:"varint,1,opt,name=rax,proto3" json:"rax,omitempty"` + Rbx uint64 `protobuf:"varint,2,opt,name=rbx,proto3" json:"rbx,omitempty"` + Rcx uint64 `protobuf:"varint,3,opt,name=rcx,proto3" json:"rcx,omitempty"` + Rdx uint64 `protobuf:"varint,4,opt,name=rdx,proto3" json:"rdx,omitempty"` + Rsi uint64 `protobuf:"varint,5,opt,name=rsi,proto3" json:"rsi,omitempty"` + Rdi uint64 `protobuf:"varint,6,opt,name=rdi,proto3" json:"rdi,omitempty"` + Rsp uint64 `protobuf:"varint,7,opt,name=rsp,proto3" json:"rsp,omitempty"` + Rbp uint64 `protobuf:"varint,8,opt,name=rbp,proto3" json:"rbp,omitempty"` + R8 uint64 `protobuf:"varint,9,opt,name=r8,proto3" json:"r8,omitempty"` + R9 uint64 `protobuf:"varint,10,opt,name=r9,proto3" json:"r9,omitempty"` + R10 uint64 `protobuf:"varint,11,opt,name=r10,proto3" json:"r10,omitempty"` + R11 uint64 `protobuf:"varint,12,opt,name=r11,proto3" json:"r11,omitempty"` + R12 uint64 `protobuf:"varint,13,opt,name=r12,proto3" json:"r12,omitempty"` + R13 uint64 `protobuf:"varint,14,opt,name=r13,proto3" json:"r13,omitempty"` + R14 uint64 `protobuf:"varint,15,opt,name=r14,proto3" json:"r14,omitempty"` + R15 uint64 `protobuf:"varint,16,opt,name=r15,proto3" json:"r15,omitempty"` + Rip uint64 `protobuf:"varint,17,opt,name=rip,proto3" json:"rip,omitempty"` + Rflags uint64 `protobuf:"varint,18,opt,name=rflags,proto3" json:"rflags,omitempty"` + OrigRax uint64 `protobuf:"varint,19,opt,name=orig_rax,json=origRax,proto3" json:"orig_rax,omitempty"` + Cs uint64 `protobuf:"varint,20,opt,name=cs,proto3" json:"cs,omitempty"` + Ds uint64 `protobuf:"varint,21,opt,name=ds,proto3" json:"ds,omitempty"` + Es uint64 `protobuf:"varint,22,opt,name=es,proto3" json:"es,omitempty"` + Fs uint64 `protobuf:"varint,23,opt,name=fs,proto3" json:"fs,omitempty"` + Gs uint64 `protobuf:"varint,24,opt,name=gs,proto3" json:"gs,omitempty"` + Ss uint64 `protobuf:"varint,25,opt,name=ss,proto3" json:"ss,omitempty"` + FsBase uint64 `protobuf:"varint,26,opt,name=fs_base,json=fsBase,proto3" json:"fs_base,omitempty"` + GsBase uint64 `protobuf:"varint,27,opt,name=gs_base,json=gsBase,proto3" json:"gs_base,omitempty"` +} + +func (x *AMD64Registers) Reset() { + *x = AMD64Registers{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_arch_registers_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *AMD64Registers) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AMD64Registers) ProtoMessage() {} + +func (x *AMD64Registers) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_arch_registers_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AMD64Registers.ProtoReflect.Descriptor instead. +func (*AMD64Registers) Descriptor() ([]byte, []int) { + return file_pkg_sentry_arch_registers_proto_rawDescGZIP(), []int{0} +} + +func (x *AMD64Registers) GetRax() uint64 { + if x != nil { + return x.Rax + } + return 0 +} + +func (x *AMD64Registers) GetRbx() uint64 { + if x != nil { + return x.Rbx + } + return 0 +} + +func (x *AMD64Registers) GetRcx() uint64 { + if x != nil { + return x.Rcx + } + return 0 +} + +func (x *AMD64Registers) GetRdx() uint64 { + if x != nil { + return x.Rdx + } + return 0 +} + +func (x *AMD64Registers) GetRsi() uint64 { + if x != nil { + return x.Rsi + } + return 0 +} + +func (x *AMD64Registers) GetRdi() uint64 { + if x != nil { + return x.Rdi + } + return 0 +} + +func (x *AMD64Registers) GetRsp() uint64 { + if x != nil { + return x.Rsp + } + return 0 +} + +func (x *AMD64Registers) GetRbp() uint64 { + if x != nil { + return x.Rbp + } + return 0 +} + +func (x *AMD64Registers) GetR8() uint64 { + if x != nil { + return x.R8 + } + return 0 +} + +func (x *AMD64Registers) GetR9() uint64 { + if x != nil { + return x.R9 + } + return 0 +} + +func (x *AMD64Registers) GetR10() uint64 { + if x != nil { + return x.R10 + } + return 0 +} + +func (x *AMD64Registers) GetR11() uint64 { + if x != nil { + return x.R11 + } + return 0 +} + +func (x *AMD64Registers) GetR12() uint64 { + if x != nil { + return x.R12 + } + return 0 +} + +func (x *AMD64Registers) GetR13() uint64 { + if x != nil { + return x.R13 + } + return 0 +} + +func (x *AMD64Registers) GetR14() uint64 { + if x != nil { + return x.R14 + } + return 0 +} + +func (x *AMD64Registers) GetR15() uint64 { + if x != nil { + return x.R15 + } + return 0 +} + +func (x *AMD64Registers) GetRip() uint64 { + if x != nil { + return x.Rip + } + return 0 +} + +func (x *AMD64Registers) GetRflags() uint64 { + if x != nil { + return x.Rflags + } + return 0 +} + +func (x *AMD64Registers) GetOrigRax() uint64 { + if x != nil { + return x.OrigRax + } + return 0 +} + +func (x *AMD64Registers) GetCs() uint64 { + if x != nil { + return x.Cs + } + return 0 +} + +func (x *AMD64Registers) GetDs() uint64 { + if x != nil { + return x.Ds + } + return 0 +} + +func (x *AMD64Registers) GetEs() uint64 { + if x != nil { + return x.Es + } + return 0 +} + +func (x *AMD64Registers) GetFs() uint64 { + if x != nil { + return x.Fs + } + return 0 +} + +func (x *AMD64Registers) GetGs() uint64 { + if x != nil { + return x.Gs + } + return 0 +} + +func (x *AMD64Registers) GetSs() uint64 { + if x != nil { + return x.Ss + } + return 0 +} + +func (x *AMD64Registers) GetFsBase() uint64 { + if x != nil { + return x.FsBase + } + return 0 +} + +func (x *AMD64Registers) GetGsBase() uint64 { + if x != nil { + return x.GsBase + } + return 0 +} + +type ARM64Registers struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + R0 uint64 `protobuf:"varint,1,opt,name=r0,proto3" json:"r0,omitempty"` + R1 uint64 `protobuf:"varint,2,opt,name=r1,proto3" json:"r1,omitempty"` + R2 uint64 `protobuf:"varint,3,opt,name=r2,proto3" json:"r2,omitempty"` + R3 uint64 `protobuf:"varint,4,opt,name=r3,proto3" json:"r3,omitempty"` + R4 uint64 `protobuf:"varint,5,opt,name=r4,proto3" json:"r4,omitempty"` + R5 uint64 `protobuf:"varint,6,opt,name=r5,proto3" json:"r5,omitempty"` + R6 uint64 `protobuf:"varint,7,opt,name=r6,proto3" json:"r6,omitempty"` + R7 uint64 `protobuf:"varint,8,opt,name=r7,proto3" json:"r7,omitempty"` + R8 uint64 `protobuf:"varint,9,opt,name=r8,proto3" json:"r8,omitempty"` + R9 uint64 `protobuf:"varint,10,opt,name=r9,proto3" json:"r9,omitempty"` + R10 uint64 `protobuf:"varint,11,opt,name=r10,proto3" json:"r10,omitempty"` + R11 uint64 `protobuf:"varint,12,opt,name=r11,proto3" json:"r11,omitempty"` + R12 uint64 `protobuf:"varint,13,opt,name=r12,proto3" json:"r12,omitempty"` + R13 uint64 `protobuf:"varint,14,opt,name=r13,proto3" json:"r13,omitempty"` + R14 uint64 `protobuf:"varint,15,opt,name=r14,proto3" json:"r14,omitempty"` + R15 uint64 `protobuf:"varint,16,opt,name=r15,proto3" json:"r15,omitempty"` + R16 uint64 `protobuf:"varint,17,opt,name=r16,proto3" json:"r16,omitempty"` + R17 uint64 `protobuf:"varint,18,opt,name=r17,proto3" json:"r17,omitempty"` + R18 uint64 `protobuf:"varint,19,opt,name=r18,proto3" json:"r18,omitempty"` + R19 uint64 `protobuf:"varint,20,opt,name=r19,proto3" json:"r19,omitempty"` + R20 uint64 `protobuf:"varint,21,opt,name=r20,proto3" json:"r20,omitempty"` + R21 uint64 `protobuf:"varint,22,opt,name=r21,proto3" json:"r21,omitempty"` + R22 uint64 `protobuf:"varint,23,opt,name=r22,proto3" json:"r22,omitempty"` + R23 uint64 `protobuf:"varint,24,opt,name=r23,proto3" json:"r23,omitempty"` + R24 uint64 `protobuf:"varint,25,opt,name=r24,proto3" json:"r24,omitempty"` + R25 uint64 `protobuf:"varint,26,opt,name=r25,proto3" json:"r25,omitempty"` + R26 uint64 `protobuf:"varint,27,opt,name=r26,proto3" json:"r26,omitempty"` + R27 uint64 `protobuf:"varint,28,opt,name=r27,proto3" json:"r27,omitempty"` + R28 uint64 `protobuf:"varint,29,opt,name=r28,proto3" json:"r28,omitempty"` + R29 uint64 `protobuf:"varint,30,opt,name=r29,proto3" json:"r29,omitempty"` + R30 uint64 `protobuf:"varint,31,opt,name=r30,proto3" json:"r30,omitempty"` + Sp uint64 `protobuf:"varint,32,opt,name=sp,proto3" json:"sp,omitempty"` + Pc uint64 `protobuf:"varint,33,opt,name=pc,proto3" json:"pc,omitempty"` + Pstate uint64 `protobuf:"varint,34,opt,name=pstate,proto3" json:"pstate,omitempty"` + Tls uint64 `protobuf:"varint,35,opt,name=tls,proto3" json:"tls,omitempty"` +} + +func (x *ARM64Registers) Reset() { + *x = ARM64Registers{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_arch_registers_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ARM64Registers) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ARM64Registers) ProtoMessage() {} + +func (x *ARM64Registers) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_arch_registers_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ARM64Registers.ProtoReflect.Descriptor instead. +func (*ARM64Registers) Descriptor() ([]byte, []int) { + return file_pkg_sentry_arch_registers_proto_rawDescGZIP(), []int{1} +} + +func (x *ARM64Registers) GetR0() uint64 { + if x != nil { + return x.R0 + } + return 0 +} + +func (x *ARM64Registers) GetR1() uint64 { + if x != nil { + return x.R1 + } + return 0 +} + +func (x *ARM64Registers) GetR2() uint64 { + if x != nil { + return x.R2 + } + return 0 +} + +func (x *ARM64Registers) GetR3() uint64 { + if x != nil { + return x.R3 + } + return 0 +} + +func (x *ARM64Registers) GetR4() uint64 { + if x != nil { + return x.R4 + } + return 0 +} + +func (x *ARM64Registers) GetR5() uint64 { + if x != nil { + return x.R5 + } + return 0 +} + +func (x *ARM64Registers) GetR6() uint64 { + if x != nil { + return x.R6 + } + return 0 +} + +func (x *ARM64Registers) GetR7() uint64 { + if x != nil { + return x.R7 + } + return 0 +} + +func (x *ARM64Registers) GetR8() uint64 { + if x != nil { + return x.R8 + } + return 0 +} + +func (x *ARM64Registers) GetR9() uint64 { + if x != nil { + return x.R9 + } + return 0 +} + +func (x *ARM64Registers) GetR10() uint64 { + if x != nil { + return x.R10 + } + return 0 +} + +func (x *ARM64Registers) GetR11() uint64 { + if x != nil { + return x.R11 + } + return 0 +} + +func (x *ARM64Registers) GetR12() uint64 { + if x != nil { + return x.R12 + } + return 0 +} + +func (x *ARM64Registers) GetR13() uint64 { + if x != nil { + return x.R13 + } + return 0 +} + +func (x *ARM64Registers) GetR14() uint64 { + if x != nil { + return x.R14 + } + return 0 +} + +func (x *ARM64Registers) GetR15() uint64 { + if x != nil { + return x.R15 + } + return 0 +} + +func (x *ARM64Registers) GetR16() uint64 { + if x != nil { + return x.R16 + } + return 0 +} + +func (x *ARM64Registers) GetR17() uint64 { + if x != nil { + return x.R17 + } + return 0 +} + +func (x *ARM64Registers) GetR18() uint64 { + if x != nil { + return x.R18 + } + return 0 +} + +func (x *ARM64Registers) GetR19() uint64 { + if x != nil { + return x.R19 + } + return 0 +} + +func (x *ARM64Registers) GetR20() uint64 { + if x != nil { + return x.R20 + } + return 0 +} + +func (x *ARM64Registers) GetR21() uint64 { + if x != nil { + return x.R21 + } + return 0 +} + +func (x *ARM64Registers) GetR22() uint64 { + if x != nil { + return x.R22 + } + return 0 +} + +func (x *ARM64Registers) GetR23() uint64 { + if x != nil { + return x.R23 + } + return 0 +} + +func (x *ARM64Registers) GetR24() uint64 { + if x != nil { + return x.R24 + } + return 0 +} + +func (x *ARM64Registers) GetR25() uint64 { + if x != nil { + return x.R25 + } + return 0 +} + +func (x *ARM64Registers) GetR26() uint64 { + if x != nil { + return x.R26 + } + return 0 +} + +func (x *ARM64Registers) GetR27() uint64 { + if x != nil { + return x.R27 + } + return 0 +} + +func (x *ARM64Registers) GetR28() uint64 { + if x != nil { + return x.R28 + } + return 0 +} + +func (x *ARM64Registers) GetR29() uint64 { + if x != nil { + return x.R29 + } + return 0 +} + +func (x *ARM64Registers) GetR30() uint64 { + if x != nil { + return x.R30 + } + return 0 +} + +func (x *ARM64Registers) GetSp() uint64 { + if x != nil { + return x.Sp + } + return 0 +} + +func (x *ARM64Registers) GetPc() uint64 { + if x != nil { + return x.Pc + } + return 0 +} + +func (x *ARM64Registers) GetPstate() uint64 { + if x != nil { + return x.Pstate + } + return 0 +} + +func (x *ARM64Registers) GetTls() uint64 { + if x != nil { + return x.Tls + } + return 0 +} + +type Registers struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to Arch: + // *Registers_Amd64 + // *Registers_Arm64 + Arch isRegisters_Arch `protobuf_oneof:"arch"` +} + +func (x *Registers) Reset() { + *x = Registers{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_arch_registers_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Registers) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Registers) ProtoMessage() {} + +func (x *Registers) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_arch_registers_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Registers.ProtoReflect.Descriptor instead. +func (*Registers) Descriptor() ([]byte, []int) { + return file_pkg_sentry_arch_registers_proto_rawDescGZIP(), []int{2} +} + +func (m *Registers) GetArch() isRegisters_Arch { + if m != nil { + return m.Arch + } + return nil +} + +func (x *Registers) GetAmd64() *AMD64Registers { + if x, ok := x.GetArch().(*Registers_Amd64); ok { + return x.Amd64 + } + return nil +} + +func (x *Registers) GetArm64() *ARM64Registers { + if x, ok := x.GetArch().(*Registers_Arm64); ok { + return x.Arm64 + } + return nil +} + +type isRegisters_Arch interface { + isRegisters_Arch() +} + +type Registers_Amd64 struct { + Amd64 *AMD64Registers `protobuf:"bytes,1,opt,name=amd64,proto3,oneof"` +} + +type Registers_Arm64 struct { + Arm64 *ARM64Registers `protobuf:"bytes,2,opt,name=arm64,proto3,oneof"` +} + +func (*Registers_Amd64) isRegisters_Arch() {} + +func (*Registers_Arm64) isRegisters_Arch() {} + +var File_pkg_sentry_arch_registers_proto protoreflect.FileDescriptor + +var file_pkg_sentry_arch_registers_proto_rawDesc = []byte{ + 0x0a, 0x1f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x61, 0x72, 0x63, + 0x68, 0x2f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x22, 0x83, 0x04, 0x0a, 0x0e, 0x41, 0x4d, + 0x44, 0x36, 0x34, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x12, 0x10, 0x0a, 0x03, + 0x72, 0x61, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x61, 0x78, 0x12, 0x10, + 0x0a, 0x03, 0x72, 0x62, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x62, 0x78, + 0x12, 0x10, 0x0a, 0x03, 0x72, 0x63, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, + 0x63, 0x78, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x64, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x03, 0x72, 0x64, 0x78, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x73, 0x69, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x72, 0x73, 0x69, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x64, 0x69, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x64, 0x69, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x73, 0x70, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x73, 0x70, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x62, + 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x62, 0x70, 0x12, 0x0e, 0x0a, 0x02, + 0x72, 0x38, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x72, 0x38, 0x12, 0x0e, 0x0a, 0x02, + 0x72, 0x39, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x72, 0x39, 0x12, 0x10, 0x0a, 0x03, + 0x72, 0x31, 0x30, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x30, 0x12, 0x10, + 0x0a, 0x03, 0x72, 0x31, 0x31, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x31, + 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x32, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, + 0x31, 0x32, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x33, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x03, 0x72, 0x31, 0x33, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x34, 0x18, 0x0f, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x72, 0x31, 0x34, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x35, 0x18, 0x10, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x35, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x69, 0x70, 0x18, + 0x11, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x69, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x66, + 0x6c, 0x61, 0x67, 0x73, 0x18, 0x12, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x72, 0x66, 0x6c, 0x61, + 0x67, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6f, 0x72, 0x69, 0x67, 0x5f, 0x72, 0x61, 0x78, 0x18, 0x13, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6f, 0x72, 0x69, 0x67, 0x52, 0x61, 0x78, 0x12, 0x0e, 0x0a, + 0x02, 0x63, 0x73, 0x18, 0x14, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x63, 0x73, 0x12, 0x0e, 0x0a, + 0x02, 0x64, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x64, 0x73, 0x12, 0x0e, 0x0a, + 0x02, 0x65, 0x73, 0x18, 0x16, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x65, 0x73, 0x12, 0x0e, 0x0a, + 0x02, 0x66, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x66, 0x73, 0x12, 0x0e, 0x0a, + 0x02, 0x67, 0x73, 0x18, 0x18, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x67, 0x73, 0x12, 0x0e, 0x0a, + 0x02, 0x73, 0x73, 0x18, 0x19, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x73, 0x73, 0x12, 0x17, 0x0a, + 0x07, 0x66, 0x73, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, + 0x66, 0x73, 0x42, 0x61, 0x73, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x67, 0x73, 0x5f, 0x62, 0x61, 0x73, + 0x65, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x67, 0x73, 0x42, 0x61, 0x73, 0x65, 0x22, + 0xf4, 0x04, 0x0a, 0x0e, 0x41, 0x52, 0x4d, 0x36, 0x34, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, + 0x72, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x30, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x30, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x31, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x31, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x32, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x32, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x33, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x33, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x34, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x34, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x35, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x35, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x36, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x36, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x37, 0x18, 0x08, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x37, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x38, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x38, 0x12, 0x0e, 0x0a, 0x02, 0x72, 0x39, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, + 0x72, 0x39, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x30, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x03, 0x72, 0x31, 0x30, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x31, 0x18, 0x0c, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x72, 0x31, 0x31, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x32, 0x18, 0x0d, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x32, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x33, 0x18, + 0x0e, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x33, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, + 0x34, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x34, 0x12, 0x10, 0x0a, 0x03, + 0x72, 0x31, 0x35, 0x18, 0x10, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x35, 0x12, 0x10, + 0x0a, 0x03, 0x72, 0x31, 0x36, 0x18, 0x11, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x31, 0x36, + 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x37, 0x18, 0x12, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, + 0x31, 0x37, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x38, 0x18, 0x13, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x03, 0x72, 0x31, 0x38, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x31, 0x39, 0x18, 0x14, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x72, 0x31, 0x39, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x30, 0x18, 0x15, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x30, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x31, 0x18, + 0x16, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x31, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, + 0x32, 0x18, 0x17, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x32, 0x12, 0x10, 0x0a, 0x03, + 0x72, 0x32, 0x33, 0x18, 0x18, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x33, 0x12, 0x10, + 0x0a, 0x03, 0x72, 0x32, 0x34, 0x18, 0x19, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x34, + 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x35, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, + 0x32, 0x35, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x36, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x03, 0x72, 0x32, 0x36, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x37, 0x18, 0x1c, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x72, 0x32, 0x37, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x38, 0x18, 0x1d, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x38, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x32, 0x39, 0x18, + 0x1e, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x32, 0x39, 0x12, 0x10, 0x0a, 0x03, 0x72, 0x33, + 0x30, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x72, 0x33, 0x30, 0x12, 0x0e, 0x0a, 0x02, + 0x73, 0x70, 0x18, 0x20, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x73, 0x70, 0x12, 0x0e, 0x0a, 0x02, + 0x70, 0x63, 0x18, 0x21, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x70, 0x63, 0x12, 0x16, 0x0a, 0x06, + 0x70, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x22, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x70, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x74, 0x6c, 0x73, 0x18, 0x23, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x03, 0x74, 0x6c, 0x73, 0x22, 0x73, 0x0a, 0x09, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, + 0x65, 0x72, 0x73, 0x12, 0x2e, 0x0a, 0x05, 0x61, 0x6d, 0x64, 0x36, 0x34, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x41, 0x4d, 0x44, 0x36, + 0x34, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x48, 0x00, 0x52, 0x05, 0x61, 0x6d, + 0x64, 0x36, 0x34, 0x12, 0x2e, 0x0a, 0x05, 0x61, 0x72, 0x6d, 0x36, 0x34, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x41, 0x52, 0x4d, 0x36, + 0x34, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x48, 0x00, 0x52, 0x05, 0x61, 0x72, + 0x6d, 0x36, 0x34, 0x42, 0x06, 0x0a, 0x04, 0x61, 0x72, 0x63, 0x68, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_sentry_arch_registers_proto_rawDescOnce sync.Once + file_pkg_sentry_arch_registers_proto_rawDescData = file_pkg_sentry_arch_registers_proto_rawDesc +) + +func file_pkg_sentry_arch_registers_proto_rawDescGZIP() []byte { + file_pkg_sentry_arch_registers_proto_rawDescOnce.Do(func() { + file_pkg_sentry_arch_registers_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_arch_registers_proto_rawDescData) + }) + return file_pkg_sentry_arch_registers_proto_rawDescData +} + +var file_pkg_sentry_arch_registers_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_pkg_sentry_arch_registers_proto_goTypes = []interface{}{ + (*AMD64Registers)(nil), // 0: gvisor.AMD64Registers + (*ARM64Registers)(nil), // 1: gvisor.ARM64Registers + (*Registers)(nil), // 2: gvisor.Registers +} +var file_pkg_sentry_arch_registers_proto_depIdxs = []int32{ + 0, // 0: gvisor.Registers.amd64:type_name -> gvisor.AMD64Registers + 1, // 1: gvisor.Registers.arm64:type_name -> gvisor.ARM64Registers + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_arch_registers_proto_init() } +func file_pkg_sentry_arch_registers_proto_init() { + if File_pkg_sentry_arch_registers_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_arch_registers_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*AMD64Registers); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_sentry_arch_registers_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ARM64Registers); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_sentry_arch_registers_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Registers); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_pkg_sentry_arch_registers_proto_msgTypes[2].OneofWrappers = []interface{}{ + (*Registers_Amd64)(nil), + (*Registers_Arm64)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_arch_registers_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_arch_registers_proto_goTypes, + DependencyIndexes: file_pkg_sentry_arch_registers_proto_depIdxs, + MessageInfos: file_pkg_sentry_arch_registers_proto_msgTypes, + }.Build() + File_pkg_sentry_arch_registers_proto = out.File + file_pkg_sentry_arch_registers_proto_rawDesc = nil + file_pkg_sentry_arch_registers_proto_goTypes = nil + file_pkg_sentry_arch_registers_proto_depIdxs = nil +} diff --git a/pkg/sentry/contexttest/BUILD b/pkg/sentry/contexttest/BUILD deleted file mode 100644 index 6f4c86684..000000000 --- a/pkg/sentry/contexttest/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "contexttest", - testonly = 1, - srcs = ["contexttest.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/memutil", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/platform/ptrace", - "//pkg/sentry/uniqueid", - ], -) diff --git a/pkg/sentry/contexttest/contexttest.go b/pkg/sentry/contexttest/contexttest.go deleted file mode 100644 index dfd195a23..000000000 --- a/pkg/sentry/contexttest/contexttest.go +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package contexttest builds a test context.Context. -package contexttest - -import ( - "os" - "sync/atomic" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" - "gvisor.dev/gvisor/pkg/sentry/uniqueid" -) - -// Context returns a Context that may be used in tests. Uses ptrace as the -// platform.Platform. -// -// Note that some filesystems may require a minimal kernel for testing, which -// this test context does not provide. For such tests, see kernel/contexttest. -func Context(tb testing.TB) context.Context { - const memfileName = "contexttest-memory" - memfd, err := memutil.CreateMemFD(memfileName, 0) - if err != nil { - tb.Fatalf("error creating application memory file: %v", err) - } - memfile := os.NewFile(uintptr(memfd), memfileName) - mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) - if err != nil { - memfile.Close() - tb.Fatalf("error creating pgalloc.MemoryFile: %v", err) - } - p, err := ptrace.New() - if err != nil { - tb.Fatal(err) - } - // Test usage of context.Background is fine. - return &TestContext{ - Context: context.Background(), - l: limits.NewLimitSet(), - mf: mf, - platform: p, - creds: auth.NewAnonymousCredentials(), - otherValues: make(map[interface{}]interface{}), - } -} - -// TestContext represents a context with minimal functionality suitable for -// running tests. -type TestContext struct { - context.Context - l *limits.LimitSet - mf *pgalloc.MemoryFile - platform platform.Platform - creds *auth.Credentials - otherValues map[interface{}]interface{} -} - -// globalUniqueID tracks incremental unique identifiers for tests. -var globalUniqueID uint64 - -// globalUniqueIDProvider implements unix.UniqueIDProvider. -type globalUniqueIDProvider struct{} - -// UniqueID implements unix.UniqueIDProvider.UniqueID. -func (*globalUniqueIDProvider) UniqueID() uint64 { - return atomic.AddUint64(&globalUniqueID, 1) -} - -// lastInotifyCookie is a monotonically increasing counter for generating unique -// inotify cookies. Must be accessed using atomic ops. -var lastInotifyCookie uint32 - -// hostClock implements ktime.Clock. -type hostClock struct { - ktime.WallRateClock - ktime.NoClockEvents -} - -// Now implements ktime.Clock.Now. -func (*hostClock) Now() ktime.Time { - return ktime.FromNanoseconds(time.Now().UnixNano()) -} - -// RegisterValue registers additional values with this test context. Useful for -// providing values from external packages that contexttest can't depend on. -func (t *TestContext) RegisterValue(key, value interface{}) { - t.otherValues[key] = value -} - -// Value implements context.Context. -func (t *TestContext) Value(key interface{}) interface{} { - switch key { - case auth.CtxCredentials: - return t.creds - case limits.CtxLimits: - return t.l - case pgalloc.CtxMemoryFile: - return t.mf - case pgalloc.CtxMemoryFileProvider: - return t - case platform.CtxPlatform: - return t.platform - case uniqueid.CtxGlobalUniqueID: - return (*globalUniqueIDProvider).UniqueID(nil) - case uniqueid.CtxGlobalUniqueIDProvider: - return &globalUniqueIDProvider{} - case uniqueid.CtxInotifyCookie: - return atomic.AddUint32(&lastInotifyCookie, 1) - case ktime.CtxRealtimeClock: - return &hostClock{} - default: - if val, ok := t.otherValues[key]; ok { - return val - } - return t.Context.Value(key) - } -} - -// MemoryFile implements pgalloc.MemoryFileProvider.MemoryFile. -func (t *TestContext) MemoryFile() *pgalloc.MemoryFile { - return t.mf -} - -// RootContext returns a Context that may be used in tests that need root -// credentials. Uses ptrace as the platform.Platform. -func RootContext(tb testing.TB) context.Context { - return auth.ContextWithCredentials(Context(tb), auth.NewRootCredentials(auth.NewRootUserNamespace())) -} - -// WithLimitSet returns a copy of ctx carrying l. -func WithLimitSet(ctx context.Context, l *limits.LimitSet) context.Context { - return limitContext{ctx, l} -} - -type limitContext struct { - context.Context - l *limits.LimitSet -} - -// Value implements context.Context. -func (lc limitContext) Value(key interface{}) interface{} { - switch key { - case limits.CtxLimits: - return lc.l - default: - return lc.Context.Value(key) - } -} diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD deleted file mode 100644 index deaf5fa23..000000000 --- a/pkg/sentry/control/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "control", - srcs = [ - "control.go", - "logging.go", - "pprof.go", - "proc.go", - "state.go", - ], - visibility = [ - "//:sandbox", - ], - deps = [ - "//pkg/abi/linux", - "//pkg/fd", - "//pkg/log", - "//pkg/sentry/fdimport", - "//pkg/sentry/fs", - "//pkg/sentry/fs/host", - "//pkg/sentry/fs/user", - "//pkg/sentry/fsimpl/host", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/state", - "//pkg/sentry/strace", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/sentry/watchdog", - "//pkg/sync", - "//pkg/tcpip/link/sniffer", - "//pkg/urpc", - ], -) - -go_test( - name = "control_test", - size = "small", - srcs = ["proc_test.go"], - library = ":control", - deps = [ - "//pkg/log", - "//pkg/sentry/kernel/time", - "//pkg/sentry/usage", - ], -) diff --git a/pkg/sentry/control/control_state_autogen.go b/pkg/sentry/control/control_state_autogen.go new file mode 100644 index 000000000..bd5797221 --- /dev/null +++ b/pkg/sentry/control/control_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package control diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go deleted file mode 100644 index 0a88459b2..000000000 --- a/pkg/sentry/control/proc_test.go +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/log" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usage" -) - -func init() { - log.SetLevel(log.Debug) -} - -// Tests that ProcessData.Table() prints with the correct format. -func TestProcessListTable(t *testing.T) { - testCases := []struct { - pl []*Process - expected string - }{ - { - pl: []*Process{}, - expected: "UID PID PPID C TTY STIME TIME CMD", - }, - { - pl: []*Process{ - { - UID: 0, - PID: 0, - PPID: 0, - C: 0, - TTY: "?", - STime: "0", - Time: "0", - Cmd: "zero", - }, - { - UID: 1, - PID: 1, - PPID: 1, - C: 1, - TTY: "pts/4", - STime: "1", - Time: "1", - Cmd: "one", - }, - }, - expected: `UID PID PPID C TTY STIME TIME CMD -0 0 0 0 ? 0 0 zero -1 1 1 1 pts/4 1 1 one`, - }, - } - - for _, tc := range testCases { - output := ProcessListToTable(tc.pl) - - if tc.expected != output { - t.Errorf("PrintTable(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected) - } - } -} - -func TestProcessListJSON(t *testing.T) { - testCases := []struct { - pl []*Process - expected string - }{ - { - pl: []*Process{}, - expected: "[]", - }, - { - pl: []*Process{ - { - UID: 0, - PID: 0, - PPID: 0, - C: 0, - STime: "0", - Time: "0", - Cmd: "zero", - }, - { - UID: 1, - PID: 1, - PPID: 1, - C: 1, - STime: "1", - Time: "1", - Cmd: "one", - }, - }, - expected: "[0,1]", - }, - } - - for _, tc := range testCases { - output, err := PrintPIDsJSON(tc.pl) - if err != nil { - t.Errorf("failed to generate JSON: %v", err) - } - - if tc.expected != output { - t.Errorf("PrintJSON(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected) - } - } -} - -func TestPercentCPU(t *testing.T) { - testCases := []struct { - stats usage.CPUStats - startTime ktime.Time - now ktime.Time - expected int32 - }{ - { - // Verify that 100% use is capped at 99. - stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9}, - startTime: ktime.FromNanoseconds(7e9), - now: ktime.FromNanoseconds(9e9), - expected: 99, - }, - { - // Verify that if usage > lifetime, we get at most 99% - // usage. - stats: usage.CPUStats{UserTime: 2e9, SysTime: 2e9}, - startTime: ktime.FromNanoseconds(7e9), - now: ktime.FromNanoseconds(9e9), - expected: 99, - }, - { - // Verify that 50% usage is reported correctly. - stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9}, - startTime: ktime.FromNanoseconds(12e9), - now: ktime.FromNanoseconds(16e9), - expected: 50, - }, - { - // Verify that 0% usage is reported correctly. - stats: usage.CPUStats{UserTime: 0, SysTime: 0}, - startTime: ktime.FromNanoseconds(12e9), - now: ktime.FromNanoseconds(14e9), - expected: 0, - }, - } - - for _, tc := range testCases { - if pcpu := percentCPU(tc.stats, tc.startTime, tc.now); pcpu != tc.expected { - t.Errorf("percentCPU(%v, %v, %v): got %d, want %d", tc.stats, tc.startTime, tc.now, pcpu, tc.expected) - } - } -} diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD deleted file mode 100644 index e403cbd8b..000000000 --- a/pkg/sentry/device/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "device", - srcs = ["device.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sync", - ], -) - -go_test( - name = "device_test", - size = "small", - srcs = ["device_test.go"], - library = ":device", -) diff --git a/pkg/sentry/device/device_state_autogen.go b/pkg/sentry/device/device_state_autogen.go new file mode 100644 index 000000000..b68325291 --- /dev/null +++ b/pkg/sentry/device/device_state_autogen.go @@ -0,0 +1,97 @@ +// automatically generated by stateify. + +package device + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/device.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "lastAnonDeviceMinor", + "devices", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.lastAnonDeviceMinor) + stateSinkObject.Save(1, &r.devices) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.lastAnonDeviceMinor) + stateSourceObject.Load(1, &r.devices) +} + +func (i *ID) StateTypeName() string { + return "pkg/sentry/device.ID" +} + +func (i *ID) StateFields() []string { + return []string{ + "Major", + "Minor", + } +} + +func (i *ID) beforeSave() {} + +// +checklocksignore +func (i *ID) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Major) + stateSinkObject.Save(1, &i.Minor) +} + +func (i *ID) afterLoad() {} + +// +checklocksignore +func (i *ID) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Major) + stateSourceObject.Load(1, &i.Minor) +} + +func (d *Device) StateTypeName() string { + return "pkg/sentry/device.Device" +} + +func (d *Device) StateFields() []string { + return []string{ + "ID", + "last", + } +} + +func (d *Device) beforeSave() {} + +// +checklocksignore +func (d *Device) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.ID) + stateSinkObject.Save(1, &d.last) +} + +func (d *Device) afterLoad() {} + +// +checklocksignore +func (d *Device) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.ID) + stateSourceObject.Load(1, &d.last) +} + +func init() { + state.Register((*Registry)(nil)) + state.Register((*ID)(nil)) + state.Register((*Device)(nil)) +} diff --git a/pkg/sentry/device/device_test.go b/pkg/sentry/device/device_test.go deleted file mode 100644 index e3f51ce4f..000000000 --- a/pkg/sentry/device/device_test.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package device - -import ( - "testing" -) - -func TestMultiDevice(t *testing.T) { - device := &MultiDevice{} - - // Check that Load fails to install virtual inodes that are - // uninitialized. - if device.Load(MultiDeviceKey{}, 0) { - t.Fatalf("got load of invalid virtual inode 0, want unsuccessful") - } - - inode := device.Map(MultiDeviceKey{}) - - // Assert that the same raw device and inode map to - // a consistent virtual inode. - if i := device.Map(MultiDeviceKey{}); i != inode { - t.Fatalf("got inode %d, want %d in %s", i, inode, device) - } - - // Assert that a new inode or new device does not conflict. - if i := device.Map(MultiDeviceKey{Device: 0, Inode: 1}); i == inode { - t.Fatalf("got reused inode %d, want new distinct inode in %s", i, device) - } - last := device.Map(MultiDeviceKey{Device: 1, Inode: 0}) - if last == inode { - t.Fatalf("got reused inode %d, want new distinct inode in %s", last, device) - } - - // Virtual is the virtual inode we want to load. - virtual := last + 1 - - // Assert that we can load a virtual inode at a new place. - if !device.Load(MultiDeviceKey{Device: 0, Inode: 2}, virtual) { - t.Fatalf("got load of virtual inode %d failed, want success in %s", virtual, device) - } - - // Assert that the next inode skips over the loaded one. - if i := device.Map(MultiDeviceKey{Device: 0, Inode: 3}); i != virtual+1 { - t.Fatalf("got inode %d, want %d in %s", i, virtual+1, device) - } -} diff --git a/pkg/sentry/devices/memdev/BUILD b/pkg/sentry/devices/memdev/BUILD deleted file mode 100644 index 4c8604d58..000000000 --- a/pkg/sentry/devices/memdev/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "memdev", - srcs = [ - "full.go", - "memdev.go", - "null.go", - "random.go", - "zero.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/rand", - "//pkg/safemem", - "//pkg/sentry/fsimpl/devtmpfs", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/devices/memdev/memdev_state_autogen.go b/pkg/sentry/devices/memdev/memdev_state_autogen.go new file mode 100644 index 000000000..a8faafbbf --- /dev/null +++ b/pkg/sentry/devices/memdev/memdev_state_autogen.go @@ -0,0 +1,241 @@ +// automatically generated by stateify. + +package memdev + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *fullDevice) StateTypeName() string { + return "pkg/sentry/devices/memdev.fullDevice" +} + +func (f *fullDevice) StateFields() []string { + return []string{} +} + +func (f *fullDevice) beforeSave() {} + +// +checklocksignore +func (f *fullDevice) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *fullDevice) afterLoad() {} + +// +checklocksignore +func (f *fullDevice) StateLoad(stateSourceObject state.Source) { +} + +func (fd *fullFD) StateTypeName() string { + return "pkg/sentry/devices/memdev.fullFD" +} + +func (fd *fullFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + } +} + +func (fd *fullFD) beforeSave() {} + +// +checklocksignore +func (fd *fullFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.NoLockFD) +} + +func (fd *fullFD) afterLoad() {} + +// +checklocksignore +func (fd *fullFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.NoLockFD) +} + +func (n *nullDevice) StateTypeName() string { + return "pkg/sentry/devices/memdev.nullDevice" +} + +func (n *nullDevice) StateFields() []string { + return []string{} +} + +func (n *nullDevice) beforeSave() {} + +// +checklocksignore +func (n *nullDevice) StateSave(stateSinkObject state.Sink) { + n.beforeSave() +} + +func (n *nullDevice) afterLoad() {} + +// +checklocksignore +func (n *nullDevice) StateLoad(stateSourceObject state.Source) { +} + +func (fd *nullFD) StateTypeName() string { + return "pkg/sentry/devices/memdev.nullFD" +} + +func (fd *nullFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + } +} + +func (fd *nullFD) beforeSave() {} + +// +checklocksignore +func (fd *nullFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.NoLockFD) +} + +func (fd *nullFD) afterLoad() {} + +// +checklocksignore +func (fd *nullFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.NoLockFD) +} + +func (r *randomDevice) StateTypeName() string { + return "pkg/sentry/devices/memdev.randomDevice" +} + +func (r *randomDevice) StateFields() []string { + return []string{} +} + +func (r *randomDevice) beforeSave() {} + +// +checklocksignore +func (r *randomDevice) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *randomDevice) afterLoad() {} + +// +checklocksignore +func (r *randomDevice) StateLoad(stateSourceObject state.Source) { +} + +func (fd *randomFD) StateTypeName() string { + return "pkg/sentry/devices/memdev.randomFD" +} + +func (fd *randomFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "off", + } +} + +func (fd *randomFD) beforeSave() {} + +// +checklocksignore +func (fd *randomFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.NoLockFD) + stateSinkObject.Save(4, &fd.off) +} + +func (fd *randomFD) afterLoad() {} + +// +checklocksignore +func (fd *randomFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.NoLockFD) + stateSourceObject.Load(4, &fd.off) +} + +func (z *zeroDevice) StateTypeName() string { + return "pkg/sentry/devices/memdev.zeroDevice" +} + +func (z *zeroDevice) StateFields() []string { + return []string{} +} + +func (z *zeroDevice) beforeSave() {} + +// +checklocksignore +func (z *zeroDevice) StateSave(stateSinkObject state.Sink) { + z.beforeSave() +} + +func (z *zeroDevice) afterLoad() {} + +// +checklocksignore +func (z *zeroDevice) StateLoad(stateSourceObject state.Source) { +} + +func (fd *zeroFD) StateTypeName() string { + return "pkg/sentry/devices/memdev.zeroFD" +} + +func (fd *zeroFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + } +} + +func (fd *zeroFD) beforeSave() {} + +// +checklocksignore +func (fd *zeroFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.NoLockFD) +} + +func (fd *zeroFD) afterLoad() {} + +// +checklocksignore +func (fd *zeroFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.NoLockFD) +} + +func init() { + state.Register((*fullDevice)(nil)) + state.Register((*fullFD)(nil)) + state.Register((*nullDevice)(nil)) + state.Register((*nullFD)(nil)) + state.Register((*randomDevice)(nil)) + state.Register((*randomFD)(nil)) + state.Register((*zeroDevice)(nil)) + state.Register((*zeroFD)(nil)) +} diff --git a/pkg/sentry/devices/quotedev/BUILD b/pkg/sentry/devices/quotedev/BUILD deleted file mode 100644 index d09214e3e..000000000 --- a/pkg/sentry/devices/quotedev/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "quotedev", - srcs = ["quotedev.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fsimpl/devtmpfs", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/devices/quotedev/quotedev.go b/pkg/sentry/devices/quotedev/quotedev.go deleted file mode 100644 index 6114cb724..000000000 --- a/pkg/sentry/devices/quotedev/quotedev.go +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package quotedev implements a vfs.Device for /dev/gvisor_quote. -package quotedev - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -const ( - quoteDevMinor = 0 -) - -// quoteDevice implements vfs.Device for /dev/gvisor_quote -// -// +stateify savable -type quoteDevice struct{} - -// Open implements vfs.Device.Open. -// TODO(b/157161182): Add support for attestation ioctls. -func (quoteDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return nil, syserror.EIO -} - -// Register registers all devices implemented by this package in vfsObj. -func Register(vfsObj *vfs.VirtualFilesystem) error { - return vfsObj.RegisterDevice(vfs.CharDevice, linux.UNNAMED_MAJOR, quoteDevMinor, quoteDevice{}, &vfs.RegisterDeviceOptions{ - GroupName: "gvisor_quote", - }) -} - -// CreateDevtmpfsFiles creates device special files in dev representing all -// devices implemented by this package. -func CreateDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor) error { - return dev.CreateDeviceFile(ctx, "gvisor_quote", vfs.CharDevice, linux.UNNAMED_MAJOR, quoteDevMinor, 0666 /* mode */) -} diff --git a/pkg/sentry/devices/ttydev/BUILD b/pkg/sentry/devices/ttydev/BUILD deleted file mode 100644 index b4b6ca38a..000000000 --- a/pkg/sentry/devices/ttydev/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "ttydev", - srcs = ["ttydev.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fsimpl/devtmpfs", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/devices/ttydev/ttydev_state_autogen.go b/pkg/sentry/devices/ttydev/ttydev_state_autogen.go new file mode 100644 index 000000000..88a1cd0a6 --- /dev/null +++ b/pkg/sentry/devices/ttydev/ttydev_state_autogen.go @@ -0,0 +1,32 @@ +// automatically generated by stateify. + +package ttydev + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *ttyDevice) StateTypeName() string { + return "pkg/sentry/devices/ttydev.ttyDevice" +} + +func (t *ttyDevice) StateFields() []string { + return []string{} +} + +func (t *ttyDevice) beforeSave() {} + +// +checklocksignore +func (t *ttyDevice) StateSave(stateSinkObject state.Sink) { + t.beforeSave() +} + +func (t *ttyDevice) afterLoad() {} + +// +checklocksignore +func (t *ttyDevice) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*ttyDevice)(nil)) +} diff --git a/pkg/sentry/devices/tundev/BUILD b/pkg/sentry/devices/tundev/BUILD deleted file mode 100644 index 60c971030..000000000 --- a/pkg/sentry/devices/tundev/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "tundev", - srcs = ["tundev.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/arch", - "//pkg/sentry/fsimpl/devtmpfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/socket/netstack", - "//pkg/sentry/vfs", - "//pkg/tcpip/link/tun", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/devices/tundev/tundev_state_autogen.go b/pkg/sentry/devices/tundev/tundev_state_autogen.go new file mode 100644 index 000000000..ad7c7feb4 --- /dev/null +++ b/pkg/sentry/devices/tundev/tundev_state_autogen.go @@ -0,0 +1,70 @@ +// automatically generated by stateify. + +package tundev + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *tunDevice) StateTypeName() string { + return "pkg/sentry/devices/tundev.tunDevice" +} + +func (t *tunDevice) StateFields() []string { + return []string{} +} + +func (t *tunDevice) beforeSave() {} + +// +checklocksignore +func (t *tunDevice) StateSave(stateSinkObject state.Sink) { + t.beforeSave() +} + +func (t *tunDevice) afterLoad() {} + +// +checklocksignore +func (t *tunDevice) StateLoad(stateSourceObject state.Source) { +} + +func (fd *tunFD) StateTypeName() string { + return "pkg/sentry/devices/tundev.tunFD" +} + +func (fd *tunFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "device", + } +} + +func (fd *tunFD) beforeSave() {} + +// +checklocksignore +func (fd *tunFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.NoLockFD) + stateSinkObject.Save(4, &fd.device) +} + +func (fd *tunFD) afterLoad() {} + +// +checklocksignore +func (fd *tunFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.NoLockFD) + stateSourceObject.Load(4, &fd.device) +} + +func init() { + state.Register((*tunDevice)(nil)) + state.Register((*tunFD)(nil)) +} diff --git a/pkg/sentry/fdimport/BUILD b/pkg/sentry/fdimport/BUILD deleted file mode 100644 index 563e96e0d..000000000 --- a/pkg/sentry/fdimport/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "fdimport", - srcs = [ - "fdimport.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/fd", - "//pkg/sentry/fs", - "//pkg/sentry/fs/host", - "//pkg/sentry/fsimpl/host", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - ], -) diff --git a/pkg/sentry/fdimport/fdimport_state_autogen.go b/pkg/sentry/fdimport/fdimport_state_autogen.go new file mode 100644 index 000000000..fbb89b276 --- /dev/null +++ b/pkg/sentry/fdimport/fdimport_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package fdimport diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD deleted file mode 100644 index 58fe1e77c..000000000 --- a/pkg/sentry/fs/BUILD +++ /dev/null @@ -1,139 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_library( - name = "fs", - srcs = [ - "attr.go", - "context.go", - "copy_up.go", - "dentry.go", - "dirent.go", - "dirent_cache.go", - "dirent_cache_limiter.go", - "dirent_list.go", - "dirent_state.go", - "event_list.go", - "file.go", - "file_operations.go", - "file_overlay.go", - "file_state.go", - "filesystems.go", - "flags.go", - "fs.go", - "inode.go", - "inode_inotify.go", - "inode_operations.go", - "inode_overlay.go", - "inotify.go", - "inotify_event.go", - "inotify_watch.go", - "mock.go", - "mount.go", - "mount_overlay.go", - "mounts.go", - "offset.go", - "overlay.go", - "path.go", - "restore.go", - "save.go", - "seek.go", - "splice.go", - "sync.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/p9", - "//pkg/refs", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsmetric", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/platform", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/state", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_template_instance( - name = "dirent_list", - out = "dirent_list.go", - package = "fs", - prefix = "dirent", - template = "//pkg/ilist:generic_list", - types = { - "Linker": "*Dirent", - "Element": "*Dirent", - }, -) - -go_template_instance( - name = "event_list", - out = "event_list.go", - package = "fs", - prefix = "event", - template = "//pkg/ilist:generic_list", - types = { - "Linker": "*Event", - "Element": "*Event", - }, -) - -go_test( - name = "fs_x_test", - size = "small", - srcs = [ - "copy_up_test.go", - "file_overlay_test.go", - "inode_overlay_test.go", - "mounts_test.go", - ], - deps = [ - ":fs", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/contexttest", - "//pkg/sync", - "//pkg/usermem", - ], -) - -go_test( - name = "fs_test", - size = "small", - srcs = [ - "dirent_cache_test.go", - "dirent_refs_test.go", - "mount_test.go", - "path_test.go", - ], - library = ":fs", - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fs/README.md b/pkg/sentry/fs/README.md deleted file mode 100644 index db4a1b730..000000000 --- a/pkg/sentry/fs/README.md +++ /dev/null @@ -1,229 +0,0 @@ -This package provides an implementation of the Linux virtual filesystem. - -[TOC] - -## Overview - -- An `fs.Dirent` caches an `fs.Inode` in memory at a path in the VFS, giving - the `fs.Inode` a relative position with respect to other `fs.Inode`s. - -- If an `fs.Dirent` is referenced by two file descriptors, then those file - descriptors are coherent with each other: they depend on the same - `fs.Inode`. - -- A mount point is an `fs.Dirent` for which `fs.Dirent.mounted` is true. It - exposes the root of a mounted filesystem. - -- The `fs.Inode` produced by a registered filesystem on mount(2) owns an - `fs.MountedFilesystem` from which other `fs.Inode`s will be looked up. For a - remote filesystem, the `fs.MountedFilesystem` owns the connection to that - remote filesystem. - -- In general: - -``` -fs.Inode <------------------------------ -| | -| | -produced by | -exactly one | -| responsible for the -| virtual identity of -v | -fs.MountedFilesystem ------------------- -``` - -Glossary: - -- VFS: virtual filesystem. - -- inode: a virtual file object holding a cached view of a file on a backing - filesystem (includes metadata and page caches). - -- superblock: the virtual state of a mounted filesystem (e.g. the virtual - inode number set). - -- mount namespace: a view of the mounts under a root (during path traversal, - the VFS makes visible/follows the mount point that is in the current task's - mount namespace). - -## Save and restore - -An application's hard dependencies on filesystem state can be broken down into -two categories: - -- The state necessary to execute a traversal on or view the *virtual* - filesystem hierarchy, regardless of what files an application has open. - -- The state necessary to represent open files. - -The first is always necessary to save and restore. An application may never have -any open file descriptors, but across save and restore it should see a coherent -view of any mount namespace. NOTE(b/63601033): Currently only one "initial" -mount namespace is supported. - -The second is so that system calls across save and restore are coherent with -each other (e.g. so that unintended re-reads or overwrites do not occur). - -Specifically this state is: - -- An `fs.MountManager` containing mount points. - -- A `kernel.FDTable` containing pointers to open files. - -Anything else managed by the VFS that can be easily loaded into memory from a -filesystem is synced back to those filesystems and is not saved. Examples are -pages in page caches used for optimizations (i.e. readahead and writeback), and -directory entries used to accelerate path lookups. - -### Mount points - -Saving and restoring a mount point means saving and restoring: - -- The root of the mounted filesystem. - -- Mount flags, which control how the VFS interacts with the mounted - filesystem. - -- Any relevant metadata about the mounted filesystem. - -- All `fs.Inode`s referenced by the application that reside under the mount - point. - -`fs.MountedFilesystem` is metadata about a filesystem that is mounted. It is -referenced by every `fs.Inode` loaded into memory under the mount point -including the `fs.Inode` of the mount point itself. The `fs.MountedFilesystem` -maps file objects on the filesystem to a virtualized `fs.Inode` number and vice -versa. - -To restore all `fs.Inode`s under a given mount point, each `fs.Inode` leverages -its dependency on an `fs.MountedFilesystem`. Since the `fs.MountedFilesystem` -knows how an `fs.Inode` maps to a file object on a backing filesystem, this -mapping can be trivially consulted by each `fs.Inode` when the `fs.Inode` is -restored. - -In detail, a mount point is saved in two steps: - -- First, after the kernel is paused but before state.Save, we walk all mount - namespaces and install a mapping from `fs.Inode` numbers to file paths - relative to the root of the mounted filesystem in each - `fs.MountedFilesystem`. This is subsequently called the set of `fs.Inode` - mappings. - -- Second, during state.Save, each `fs.MountedFilesystem` decides whether to - save the set of `fs.Inode` mappings. In-memory filesystems, like tmpfs, have - no need to save a set of `fs.Inode` mappings, since the `fs.Inode`s can be - entirely encoded in state file. Each `fs.MountedFilesystem` also optionally - saves the device name from when the filesystem was originally mounted. Each - `fs.Inode` saves its virtual identifier and a reference to a - `fs.MountedFilesystem`. - -A mount point is restored in two steps: - -- First, before state.Load, all mount configurations are stored in a global - `fs.RestoreEnvironment`. This tells us what mount points the user wants to - restore and how to re-establish pointers to backing filesystems. - -- Second, during state.Load, each `fs.MountedFilesystem` optionally searches - for a mount in the `fs.RestoreEnvironment` that matches its saved device - name. The `fs.MountedFilesystem` then reestablishes a pointer to the root of - the mounted filesystem. For example, the mount specification provides the - network connection for a mounted remote filesystem client to communicate - with its remote file server. The `fs.MountedFilesystem` also trivially loads - its set of `fs.Inode` mappings. When an `fs.Inode` is encountered, the - `fs.Inode` loads its virtual identifier and its reference a - `fs.MountedFilesystem`. It uses the `fs.MountedFilesystem` to obtain the - root of the mounted filesystem and the `fs.Inode` mappings to obtain the - relative file path to its data. With these, the `fs.Inode` re-establishes a - pointer to its file object. - -A mount point can trivially restore its `fs.Inode`s in parallel since -`fs.Inode`s have a restore dependency on their `fs.MountedFilesystem` and not on -each other. - -### Open files - -An `fs.File` references the following filesystem objects: - -```go -fs.File -> fs.Dirent -> fs.Inode -> fs.MountedFilesystem -``` - -The `fs.Inode` is restored using its `fs.MountedFilesystem`. The -[Mount points](#mount-points) section above describes how this happens in -detail. The `fs.Dirent` restores its pointer to an `fs.Inode`, pointers to -parent and children `fs.Dirents`, and the basename of the file. - -Otherwise an `fs.File` restores flags, an offset, and a unique identifier (only -used internally). - -It may use the `fs.Inode`, which it indirectly holds a reference on through the -`fs.Dirent`, to reestablish an open file handle on the backing filesystem (e.g. -to continue reading and writing). - -## Overlay - -The overlay implementation in the fs package takes Linux overlayfs as a frame of -reference but corrects for several POSIX consistency errors. - -In Linux overlayfs, the `struct inode` used for reading and writing to the same -file may be different. This is because the `struct inode` is dissociated with -the process of copying up the file from the upper to the lower directory. Since -flock(2) and fcntl(2) locks, inotify(7) watches, page caches, and a file's -identity are all stored directly or indirectly off the `struct inode`, these -properties of the `struct inode` may be stale after the first modification. This -can lead to file locking bugs, missed inotify events, and inconsistent data in -shared memory mappings of files, to name a few problems. - -The fs package maintains a single `fs.Inode` to represent a directory entry in -an overlay and defines operations on this `fs.Inode` which synchronize with the -copy up process. This achieves several things: - -+ File locks, inotify watches, and the identity of the file need not be copied - at all. - -+ Memory mappings of files coordinate with the copy up process so that if a - file in the lower directory is memory mapped, all references to it are - invalidated, forcing the application to re-fault on memory mappings of the - file under the upper directory. - -The `fs.Inode` holds metadata about files in the upper and/or lower directories -via an `fs.overlayEntry`. The `fs.overlayEntry` implements the `fs.Mappable` -interface. It multiplexes between upper and lower directory memory mappings and -stores a copy of memory references so they can be transferred to the upper -directory `fs.Mappable` when the file is copied up. - -The lower filesystem in an overlay may contain another (nested) overlay, but the -upper filesystem may not contain another overlay. In other words, nested -overlays form a tree structure that only allows branching in the lower -filesystem. - -Caching decisions in the overlay are delegated to the upper filesystem, meaning -that the Keep and Revalidate methods on the overlay return the same values as -the upper filesystem. A small wrinkle is that the lower filesystem is not -allowed to return `true` from Revalidate, as the overlay can not reload inodes -from the lower filesystem. A lower filesystem that does return `true` from -Revalidate will trigger a panic. - -The `fs.Inode` also holds a reference to a `fs.MountedFilesystem` that -normalizes across the mounted filesystem state of the upper and lower -directories. - -When a file is copied from the lower to the upper directory, attempts to -interact with the file block until the copy completes. All copying synchronizes -with rename(2). - -## Future Work - -### Overlay - -When a file is copied from a lower directory to an upper directory, several -locks are taken: the global renamuMu and the copyMu of the `fs.Inode` being -copied. This blocks operations on the file, including fault handling of memory -mappings. Performance could be improved by copying files into a temporary -directory that resides on the same filesystem as the upper directory and doing -an atomic rename, holding locks only during the rename operation. - -Additionally files are copied up synchronously. For large files, this causes a -noticeable latency. Performance could be improved by pipelining copies at -non-overlapping file offsets. diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD deleted file mode 100644 index 1ce56d79f..000000000 --- a/pkg/sentry/fs/anon/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "anon", - srcs = [ - "anon.go", - "device.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/hostarch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - ], -) diff --git a/pkg/sentry/fs/anon/anon_state_autogen.go b/pkg/sentry/fs/anon/anon_state_autogen.go new file mode 100644 index 000000000..b2b1a466e --- /dev/null +++ b/pkg/sentry/fs/anon/anon_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package anon diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go deleted file mode 100644 index e04784db2..000000000 --- a/pkg/sentry/fs/copy_up_test.go +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "bytes" - "crypto/rand" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" -) - -const ( - // origFileSize is the original file size. This many bytes should be - // copied up before the test file is modified. - origFileSize = 4096 - - // truncatedFileSize is the size to truncate all test files. - truncateFileSize = 10 -) - -// TestConcurrentCopyUp is a copy up stress test for an overlay. -// -// It creates a 64-level deep directory tree in the lower filesystem and -// populates the last subdirectory with 64 files containing random content: -// -// /lower -// /sudir0/.../subdir63/ -// /file0 -// ... -// /file63 -// -// The files are truncated concurrently by 4 goroutines per file. -// These goroutines contend with copying up all parent 64 subdirectories -// as well as the final file content. -// -// At the end of the test, we assert that the files respect the new truncated -// size and contain the content we expect. -func TestConcurrentCopyUp(t *testing.T) { - ctx := contexttest.Context(t) - files := makeOverlayTestFiles(t) - - var wg sync.WaitGroup - for _, file := range files { - for i := 0; i < 4; i++ { - wg.Add(1) - go func(o *overlayTestFile) { - if err := o.File.Dirent.Inode.Truncate(ctx, o.File.Dirent, truncateFileSize); err != nil { - t.Errorf("failed to copy up: %v", err) - } - wg.Done() - }(file) - } - } - wg.Wait() - - for _, file := range files { - got := make([]byte, origFileSize) - n, err := file.File.Readv(ctx, usermem.BytesIOSequence(got)) - if int(n) != truncateFileSize { - t.Fatalf("read %d bytes from file, want %d", n, truncateFileSize) - } - if err != nil && err != io.EOF { - t.Fatalf("read got error %v, want nil", err) - } - if !bytes.Equal(got[:n], file.content[:truncateFileSize]) { - t.Fatalf("file content is %v, want %v", got[:n], file.content[:truncateFileSize]) - } - } -} - -type overlayTestFile struct { - File *fs.File - name string - content []byte -} - -func makeOverlayTestFiles(t *testing.T) []*overlayTestFile { - ctx := contexttest.Context(t) - - // Create a lower tmpfs mount. - fsys, _ := fs.FindFilesystem("tmpfs") - lower, err := fsys.Mount(contexttest.Context(t), "", fs.MountSourceFlags{}, "", nil) - if err != nil { - t.Fatalf("failed to mount tmpfs: %v", err) - } - lowerRoot := fs.NewDirent(ctx, lower, "") - - // Make a deep set of subdirectories that everyone shares. - next := lowerRoot - for i := 0; i < 64; i++ { - name := fmt.Sprintf("subdir%d", i) - err := next.CreateDirectory(ctx, lowerRoot, name, fs.FilePermsFromMode(0777)) - if err != nil { - t.Fatalf("failed to create dir %q: %v", name, err) - } - next, err = next.Walk(ctx, lowerRoot, name) - if err != nil { - t.Fatalf("failed to walk to %q: %v", name, err) - } - } - - // Make a bunch of files in the last directory. - var files []*overlayTestFile - for i := 0; i < 64; i++ { - name := fmt.Sprintf("file%d", i) - f, err := next.Create(ctx, next, name, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - t.Fatalf("failed to create file %q: %v", name, err) - } - defer f.DecRef(ctx) - - relname, _ := f.Dirent.FullName(lowerRoot) - - o := &overlayTestFile{ - name: relname, - content: make([]byte, origFileSize), - } - - if _, err := rand.Read(o.content); err != nil { - t.Fatalf("failed to read from /dev/urandom: %v", err) - } - - if _, err := f.Writev(ctx, usermem.BytesIOSequence(o.content)); err != nil { - t.Fatalf("failed to write content to file %q: %v", name, err) - } - - files = append(files, o) - } - - // Create an empty upper tmpfs mount which we will copy up into. - upper, err := fsys.Mount(ctx, "", fs.MountSourceFlags{}, "", nil) - if err != nil { - t.Fatalf("failed to mount tmpfs: %v", err) - } - - // Construct an overlay root. - overlay, err := fs.NewOverlayRoot(ctx, upper, lower, fs.MountSourceFlags{}) - if err != nil { - t.Fatalf("failed to construct overlay root: %v", err) - } - - // Create a MountNamespace to traverse the file system. - mns, err := fs.NewMountNamespace(ctx, overlay) - if err != nil { - t.Fatalf("failed to construct mount manager: %v", err) - } - - // Walk to all of the files in the overlay, open them readable. - for _, f := range files { - maxTraversals := uint(0) - d, err := mns.FindInode(ctx, mns.Root(), mns.Root(), f.name, &maxTraversals) - if err != nil { - t.Fatalf("failed to find %q: %v", f.name, err) - } - defer d.DecRef(ctx) - - f.File, err = d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("failed to open file %q readable: %v", f.name, err) - } - } - - return files -} diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD deleted file mode 100644 index e28a8961b..000000000 --- a/pkg/sentry/fs/dev/BUILD +++ /dev/null @@ -1,42 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "dev", - srcs = [ - "dev.go", - "device.go", - "fs.go", - "full.go", - "net_tun.go", - "null.go", - "random.go", - "tty.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/rand", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/socket/netstack", - "//pkg/syserror", - "//pkg/tcpip/link/tun", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/dev/dev_state_autogen.go b/pkg/sentry/fs/dev/dev_state_autogen.go new file mode 100644 index 000000000..23945d391 --- /dev/null +++ b/pkg/sentry/fs/dev/dev_state_autogen.go @@ -0,0 +1,324 @@ +// automatically generated by stateify. + +package dev + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/dev.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +// +checklocksignore +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +// +checklocksignore +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (f *fullDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.fullDevice" +} + +func (f *fullDevice) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (f *fullDevice) beforeSave() {} + +// +checklocksignore +func (f *fullDevice) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeSimpleAttributes) +} + +func (f *fullDevice) afterLoad() {} + +// +checklocksignore +func (f *fullDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeSimpleAttributes) +} + +func (f *fullFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.fullFileOperations" +} + +func (f *fullFileOperations) StateFields() []string { + return []string{} +} + +func (f *fullFileOperations) beforeSave() {} + +// +checklocksignore +func (f *fullFileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *fullFileOperations) afterLoad() {} + +// +checklocksignore +func (f *fullFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (n *netTunInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.netTunInodeOperations" +} + +func (n *netTunInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (n *netTunInodeOperations) beforeSave() {} + +// +checklocksignore +func (n *netTunInodeOperations) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.InodeSimpleAttributes) +} + +func (n *netTunInodeOperations) afterLoad() {} + +// +checklocksignore +func (n *netTunInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.InodeSimpleAttributes) +} + +func (n *netTunFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.netTunFileOperations" +} + +func (n *netTunFileOperations) StateFields() []string { + return []string{ + "device", + } +} + +func (n *netTunFileOperations) beforeSave() {} + +// +checklocksignore +func (n *netTunFileOperations) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.device) +} + +func (n *netTunFileOperations) afterLoad() {} + +// +checklocksignore +func (n *netTunFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.device) +} + +func (n *nullDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.nullDevice" +} + +func (n *nullDevice) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (n *nullDevice) beforeSave() {} + +// +checklocksignore +func (n *nullDevice) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.InodeSimpleAttributes) +} + +func (n *nullDevice) afterLoad() {} + +// +checklocksignore +func (n *nullDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.InodeSimpleAttributes) +} + +func (n *nullFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.nullFileOperations" +} + +func (n *nullFileOperations) StateFields() []string { + return []string{} +} + +func (n *nullFileOperations) beforeSave() {} + +// +checklocksignore +func (n *nullFileOperations) StateSave(stateSinkObject state.Sink) { + n.beforeSave() +} + +func (n *nullFileOperations) afterLoad() {} + +// +checklocksignore +func (n *nullFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (zd *zeroDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.zeroDevice" +} + +func (zd *zeroDevice) StateFields() []string { + return []string{ + "nullDevice", + } +} + +func (zd *zeroDevice) beforeSave() {} + +// +checklocksignore +func (zd *zeroDevice) StateSave(stateSinkObject state.Sink) { + zd.beforeSave() + stateSinkObject.Save(0, &zd.nullDevice) +} + +func (zd *zeroDevice) afterLoad() {} + +// +checklocksignore +func (zd *zeroDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &zd.nullDevice) +} + +func (z *zeroFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.zeroFileOperations" +} + +func (z *zeroFileOperations) StateFields() []string { + return []string{} +} + +func (z *zeroFileOperations) beforeSave() {} + +// +checklocksignore +func (z *zeroFileOperations) StateSave(stateSinkObject state.Sink) { + z.beforeSave() +} + +func (z *zeroFileOperations) afterLoad() {} + +// +checklocksignore +func (z *zeroFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (r *randomDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.randomDevice" +} + +func (r *randomDevice) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (r *randomDevice) beforeSave() {} + +// +checklocksignore +func (r *randomDevice) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.InodeSimpleAttributes) +} + +func (r *randomDevice) afterLoad() {} + +// +checklocksignore +func (r *randomDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.InodeSimpleAttributes) +} + +func (r *randomFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.randomFileOperations" +} + +func (r *randomFileOperations) StateFields() []string { + return []string{} +} + +func (r *randomFileOperations) beforeSave() {} + +// +checklocksignore +func (r *randomFileOperations) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *randomFileOperations) afterLoad() {} + +// +checklocksignore +func (r *randomFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (t *ttyInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.ttyInodeOperations" +} + +func (t *ttyInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (t *ttyInodeOperations) beforeSave() {} + +// +checklocksignore +func (t *ttyInodeOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.InodeSimpleAttributes) +} + +func (t *ttyInodeOperations) afterLoad() {} + +// +checklocksignore +func (t *ttyInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.InodeSimpleAttributes) +} + +func (t *ttyFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.ttyFileOperations" +} + +func (t *ttyFileOperations) StateFields() []string { + return []string{} +} + +func (t *ttyFileOperations) beforeSave() {} + +// +checklocksignore +func (t *ttyFileOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() +} + +func (t *ttyFileOperations) afterLoad() {} + +// +checklocksignore +func (t *ttyFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*filesystem)(nil)) + state.Register((*fullDevice)(nil)) + state.Register((*fullFileOperations)(nil)) + state.Register((*netTunInodeOperations)(nil)) + state.Register((*netTunFileOperations)(nil)) + state.Register((*nullDevice)(nil)) + state.Register((*nullFileOperations)(nil)) + state.Register((*zeroDevice)(nil)) + state.Register((*zeroFileOperations)(nil)) + state.Register((*randomDevice)(nil)) + state.Register((*randomFileOperations)(nil)) + state.Register((*ttyInodeOperations)(nil)) + state.Register((*ttyFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/dirent_cache_test.go b/pkg/sentry/fs/dirent_cache_test.go deleted file mode 100644 index 395c879f5..000000000 --- a/pkg/sentry/fs/dirent_cache_test.go +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" -) - -func TestDirentCache(t *testing.T) { - const maxSize = 5 - - c := NewDirentCache(maxSize) - - // Size starts at 0. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Create a Dirent d. - d := NewNegativeDirent("") - - // c does not contain d. - if got, want := c.contains(d), false; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add d to the cache. - c.Add(d) - - // Size is now 1. - if got, want := c.Size(), uint64(1); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add maxSize-1 more elements. d should be oldest element. - for i := 0; i < maxSize-1; i++ { - c.Add(NewNegativeDirent("")) - } - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // "Bump" d to the front by re-adding it. - c.Add(d) - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add maxSize-1 more elements. d should again be oldest element. - for i := 0; i < maxSize-1; i++ { - c.Add(NewNegativeDirent("")) - } - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add one more element, which will bump d from the cache. - c.Add(NewNegativeDirent("")) - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c does not contain d. - if got, want := c.contains(d), false; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Invalidating causes size to be 0 and list to be empty. - c.Invalidate() - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - if got, want := c.list.Empty(), true; got != want { - t.Errorf("c.list.Empty() got %v, want %v", got, want) - } - - // Fill cache with maxSize dirents. - for i := 0; i < maxSize; i++ { - c.Add(NewNegativeDirent("")) - } -} - -func TestDirentCacheLimiter(t *testing.T) { - const ( - globalMaxSize = 5 - maxSize = 3 - ) - - limit := NewDirentCacheLimiter(globalMaxSize) - c1 := NewDirentCache(maxSize) - c1.limit = limit - c2 := NewDirentCache(maxSize) - c2.limit = limit - - // Create a Dirent d. - d := NewNegativeDirent("") - - // Add d to the cache. - c1.Add(d) - if got, want := c1.Size(), uint64(1); got != want { - t.Errorf("c1.Size() got %v, want %v", got, want) - } - - // Add maxSize-1 more elements. d should be oldest element. - for i := 0; i < maxSize-1; i++ { - c1.Add(NewNegativeDirent("")) - } - if got, want := c1.Size(), uint64(maxSize); got != want { - t.Errorf("c1.Size() got %v, want %v", got, want) - } - - // Check that d is still there. - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Fill up the other cache, it will start dropping old entries from the cache - // when the global limit is reached. - for i := 0; i < maxSize; i++ { - c2.Add(NewNegativeDirent("")) - } - - // Check is what's remaining from global max. - if got, want := c2.Size(), globalMaxSize-maxSize; int(got) != want { - t.Errorf("c2.Size() got %v, want %v", got, want) - } - - // Check that d was not dropped. - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Add an entry that will eventually be dropped. Check is done later... - drop := NewNegativeDirent("") - c1.Add(drop) - - // Check that d is bumped to front even when global limit is reached. - c1.Add(d) - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Add 2 more element and check that: - // - d is still in the list: to verify that d was bumped - // - d2/d3 are in the list: older entries are dropped when global limit is - // reached. - // - drop is not in the list: indeed older elements are dropped. - d2 := NewNegativeDirent("") - c1.Add(d2) - d3 := NewNegativeDirent("") - c1.Add(d3) - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - if got, want := c1.contains(d2), true; got != want { - t.Errorf("c1.contains(d2) got %v want %v", got, want) - } - if got, want := c1.contains(d3), true; got != want { - t.Errorf("c1.contains(d3) got %v want %v", got, want) - } - if got, want := c1.contains(drop), false; got != want { - t.Errorf("c1.contains(drop) got %v want %v", got, want) - } - - // Drop all entries from one cache. The other will be allowed to grow. - c1.Invalidate() - c2.Add(NewNegativeDirent("")) - if got, want := c2.Size(), uint64(maxSize); got != want { - t.Errorf("c2.Size() got %v, want %v", got, want) - } -} - -// TestNilDirentCache tests that a nil cache supports all cache operations, but -// treats them as noop. -func TestNilDirentCache(t *testing.T) { - // Create a nil cache. - var c *DirentCache - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Add. - c.Add(NewNegativeDirent("")) - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Remove. - c.Remove(NewNegativeDirent("")) - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Invalidate. - c.Invalidate() - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } -} diff --git a/pkg/sentry/fs/dirent_list.go b/pkg/sentry/fs/dirent_list.go new file mode 100644 index 000000000..00dd39146 --- /dev/null +++ b/pkg/sentry/fs/dirent_list.go @@ -0,0 +1,221 @@ +package fs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type direntElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (direntElementMapper) linkerFor(elem *Dirent) *Dirent { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type direntList struct { + head *Dirent + tail *Dirent +} + +// Reset resets list l to the empty state. +func (l *direntList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *direntList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *direntList) Front() *Dirent { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *direntList) Back() *Dirent { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *direntList) Len() (count int) { + for e := l.Front(); e != nil; e = (direntElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *direntList) PushFront(e *Dirent) { + linker := direntElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + direntElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *direntList) PushBack(e *Dirent) { + linker := direntElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + direntElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *direntList) PushBackList(m *direntList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + direntElementMapper{}.linkerFor(l.tail).SetNext(m.head) + direntElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *direntList) InsertAfter(b, e *Dirent) { + bLinker := direntElementMapper{}.linkerFor(b) + eLinker := direntElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + direntElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *direntList) InsertBefore(a, e *Dirent) { + aLinker := direntElementMapper{}.linkerFor(a) + eLinker := direntElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + direntElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *direntList) Remove(e *Dirent) { + linker := direntElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + direntElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + direntElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type direntEntry struct { + next *Dirent + prev *Dirent +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *direntEntry) Next() *Dirent { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *direntEntry) Prev() *Dirent { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *direntEntry) SetNext(elem *Dirent) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *direntEntry) SetPrev(elem *Dirent) { + e.prev = elem +} diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go deleted file mode 100644 index e2b66f357..000000000 --- a/pkg/sentry/fs/dirent_refs_test.go +++ /dev/null @@ -1,418 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" -) - -func newMockDirInode(ctx context.Context, cache *DirentCache) *Inode { - return NewMockInode(ctx, NewMockMountSource(cache), StableAttr{Type: Directory}) -} - -func TestWalkPositive(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, newMockDirInode(ctx, nil), "root") - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - name := "d" - d, err := root.walk(ctx, root, name, false) - if err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - - if got := root.ReadRefs(); got != 2 { - t.Fatalf("root has a ref count of %d, want %d", got, 2) - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 1) - } - - d.DecRef(ctx) - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - if got := d.ReadRefs(); got != 0 { - t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 0) - } - - root.flush(ctx) - - if got := len(root.children); got != 0 { - t.Fatalf("root has %d children, want %d", got, 0) - } -} - -func TestWalkNegative(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") - mn := root.Inode.InodeOperations.(*mockInodeOperationsLookupNegative) - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - name := "d" - for i := 0; i < 100; i++ { - _, err := root.walk(ctx, root, name, false) - if err != unix.ENOENT { - t.Fatalf("root.walk(root, %q) got %v, want %v", name, err, unix.ENOENT) - } - } - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - if got := len(root.children); got != 1 { - t.Fatalf("root has %d children, want %d", got, 1) - } - - w, ok := root.children[name] - if !ok { - t.Fatalf("root wants child at %q", name) - } - - child := w.Get() - if child == nil { - t.Fatalf("root wants to resolve weak reference") - } - - if !child.(*Dirent).IsNegative() { - t.Fatalf("root found positive child at %q, want negative", name) - } - - if got := child.(*Dirent).ReadRefs(); got != 2 { - t.Fatalf("child has a ref count of %d, want %d", got, 2) - } - - child.DecRef(ctx) - - if got := child.(*Dirent).ReadRefs(); got != 1 { - t.Fatalf("child has a ref count of %d, want %d", got, 1) - } - - if got := len(root.children); got != 1 { - t.Fatalf("root has %d children, want %d", got, 1) - } - - root.DecRef(ctx) - - if got := root.ReadRefs(); got != 0 { - t.Fatalf("root has a ref count of %d, want %d", got, 0) - } - - AsyncBarrier() - - if got := mn.releaseCalled; got != true { - t.Fatalf("root.Close was called %v, want true", got) - } -} - -type mockInodeOperationsLookupNegative struct { - *MockInodeOperations - releaseCalled bool -} - -func NewEmptyDir(ctx context.Context, cache *DirentCache) *Inode { - m := NewMockMountSource(cache) - return NewInode(ctx, &mockInodeOperationsLookupNegative{ - MockInodeOperations: NewMockInodeOperations(ctx), - }, m, StableAttr{Type: Directory}) -} - -func (m *mockInodeOperationsLookupNegative) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { - return NewNegativeDirent(p), nil -} - -func (m *mockInodeOperationsLookupNegative) Release(context.Context) { - m.releaseCalled = true -} - -func TestHashNegativeToPositive(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") - - name := "d" - _, err := root.walk(ctx, root, name, false) - if err != unix.ENOENT { - t.Fatalf("root.walk(root, %q) got %v, want %v", name, err, unix.ENOENT) - } - - if got := root.exists(ctx, root, name); got != false { - t.Fatalf("got %q exists, want does not exist", name) - } - - f, err := root.Create(ctx, root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(%q, _), got error %v, want nil", name, err) - } - d := f.Dirent - - if d.IsNegative() { - t.Fatalf("got negative Dirent, want positive") - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("child %q has a ref count of %d, want %d", name, got, 1) - } - - if got := root.ReadRefs(); got != 2 { - t.Fatalf("root has a ref count of %d, want %d", got, 2) - } - - if got := len(root.children); got != 1 { - t.Fatalf("got %d children, want %d", got, 1) - } - - w, ok := root.children[name] - if !ok { - t.Fatalf("failed to find weak reference to %q", name) - } - - child := w.Get() - if child == nil { - t.Fatalf("want to resolve weak reference") - } - - if child.(*Dirent) != d { - t.Fatalf("got foreign child") - } -} - -func TestRevalidate(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - for _, test := range []struct { - // desc is the test's description. - desc string - - // Whether to make negative Dirents. - makeNegative bool - }{ - { - desc: "Revalidate negative Dirent", - makeNegative: true, - }, - { - desc: "Revalidate positive Dirent", - makeNegative: false, - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewMockInodeRevalidate(ctx, test.makeNegative), "root") - - name := "d" - d1, err := root.walk(ctx, root, name, false) - if !test.makeNegative && err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - d2, err := root.walk(ctx, root, name, false) - if !test.makeNegative && err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - if !test.makeNegative && d1 == d2 { - t.Fatalf("revalidating walk got same *Dirent, want different") - } - if got := len(root.children); got != 1 { - t.Errorf("revalidating walk got %d children, want %d", got, 1) - } - }) - } -} - -type MockInodeOperationsRevalidate struct { - *MockInodeOperations - makeNegative bool -} - -func NewMockInodeRevalidate(ctx context.Context, makeNegative bool) *Inode { - mn := NewMockInodeOperations(ctx) - m := NewMockMountSource(nil) - m.MountSourceOperations.(*MockMountSourceOps).revalidate = true - return NewInode(ctx, &MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory}) -} - -func (m *MockInodeOperationsRevalidate) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { - if !m.makeNegative { - return m.MockInodeOperations.Lookup(ctx, dir, p) - } - return NewNegativeDirent(p), nil -} - -func TestCreateExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - for _, test := range []struct { - // desc is the test's description. - desc string - - // root is the Dirent to create from. - root *Dirent - - // expected references on walked Dirent. - refs int64 - }{ - { - desc: "Create caching", - root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), - refs: 2, - }, - { - desc: "Create not caching", - root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), - refs: 1, - }, - } { - t.Run(test.desc, func(t *testing.T) { - name := "d" - f, err := test.root.Create(ctx, test.root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(root, %q) failed: %v", name, err) - } - d := f.Dirent - - if got := d.ReadRefs(); got != test.refs { - t.Errorf("dirent has a ref count of %d, want %d", got, test.refs) - } - }) - } -} - -func TestRemoveExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - for _, test := range []struct { - // desc is the test's description. - desc string - - // root is the Dirent to make and remove from. - root *Dirent - }{ - { - desc: "Remove caching", - root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), - }, - { - desc: "Remove not caching", - root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), - }, - } { - t.Run(test.desc, func(t *testing.T) { - name := "d" - f, err := test.root.Create(ctx, test.root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(%q, _) failed: %v", name, err) - } - d := f.Dirent - - if err := test.root.Remove(contexttest.Context(t), test.root, name, false /* dirPath */); err != nil { - t.Fatalf("root.Remove(root, %q) failed: %v", name, err) - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("dirent has a ref count of %d, want %d", got, 1) - } - - d.DecRef(ctx) - - test.root.flush(ctx) - - if got := len(test.root.children); got != 0 { - t.Errorf("root has %d children, want %d", got, 0) - } - }) - } -} - -func TestRenameExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - for _, test := range []struct { - // desc is the test's description. - desc string - - // cache of extra Dirent references, may be nil. - cache *DirentCache - }{ - { - desc: "Rename no caching", - cache: nil, - }, - { - desc: "Rename caching", - cache: NewDirentCache(5), - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - - dirAttr := StableAttr{Type: Directory} - - oldParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent") - newParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent") - - renamed, err := oldParent.Walk(ctx, oldParent, "old_child") - if err != nil { - t.Fatalf("Walk(oldParent, %q) got error %v, want nil", "old_child", err) - } - replaced, err := newParent.Walk(ctx, oldParent, "new_child") - if err != nil { - t.Fatalf("Walk(newParent, %q) got error %v, want nil", "new_child", err) - } - - if err := Rename(contexttest.RootContext(t), oldParent /*root */, oldParent, "old_child", newParent, "new_child"); err != nil { - t.Fatalf("Rename got error %v, want nil", err) - } - - oldParent.flush(ctx) - newParent.flush(ctx) - - // Expect to have only active references. - if got := renamed.ReadRefs(); got != 1 { - t.Errorf("renamed has ref count %d, want only active references %d", got, 1) - } - if got := replaced.ReadRefs(); got != 1 { - t.Errorf("replaced has ref count %d, want only active references %d", got, 1) - } - }) - } -} diff --git a/pkg/sentry/fs/event_list.go b/pkg/sentry/fs/event_list.go new file mode 100644 index 000000000..ceb97b703 --- /dev/null +++ b/pkg/sentry/fs/event_list.go @@ -0,0 +1,221 @@ +package fs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type eventElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (eventElementMapper) linkerFor(elem *Event) *Event { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type eventList struct { + head *Event + tail *Event +} + +// Reset resets list l to the empty state. +func (l *eventList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *eventList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *eventList) Front() *Event { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *eventList) Back() *Event { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *eventList) Len() (count int) { + for e := l.Front(); e != nil; e = (eventElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *eventList) PushFront(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + eventElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *eventList) PushBack(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *eventList) PushBackList(m *eventList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(m.head) + eventElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *eventList) InsertAfter(b, e *Event) { + bLinker := eventElementMapper{}.linkerFor(b) + eLinker := eventElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + eventElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *eventList) InsertBefore(a, e *Event) { + aLinker := eventElementMapper{}.linkerFor(a) + eLinker := eventElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + eventElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *eventList) Remove(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + eventElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + eventElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type eventEntry struct { + next *Event + prev *Event +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *eventEntry) Next() *Event { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *eventEntry) Prev() *Event { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *eventEntry) SetNext(elem *Event) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *eventEntry) SetPrev(elem *Event) { + e.prev = elem +} diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD deleted file mode 100644 index 5c889c861..000000000 --- a/pkg/sentry/fs/fdpipe/BUILD +++ /dev/null @@ -1,54 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "fdpipe", - srcs = [ - "pipe.go", - "pipe_opener.go", - "pipe_state.go", - ], - imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/log", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "fdpipe_test", - size = "small", - srcs = [ - "pipe_opener_test.go", - "pipe_test.go", - ], - library = ":fdpipe", - deps = [ - "//pkg/context", - "//pkg/errors", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/hostarch", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/syserror", - "//pkg/usermem", - "@com_github_google_uuid//:go_default_library", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go b/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go new file mode 100644 index 000000000..92dcd2391 --- /dev/null +++ b/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go @@ -0,0 +1,41 @@ +// automatically generated by stateify. + +package fdpipe + +import ( + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/state" +) + +func (p *pipeOperations) StateTypeName() string { + return "pkg/sentry/fs/fdpipe.pipeOperations" +} + +func (p *pipeOperations) StateFields() []string { + return []string{ + "flags", + "opener", + "readAheadBuffer", + } +} + +// +checklocksignore +func (p *pipeOperations) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var flagsValue fs.FileFlags = p.saveFlags() + stateSinkObject.SaveValue(0, flagsValue) + stateSinkObject.Save(1, &p.opener) + stateSinkObject.Save(2, &p.readAheadBuffer) +} + +// +checklocksignore +func (p *pipeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(1, &p.opener) + stateSourceObject.Load(2, &p.readAheadBuffer) + stateSourceObject.LoadValue(0, new(fs.FileFlags), func(y interface{}) { p.loadFlags(y.(fs.FileFlags)) }) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func init() { + state.Register((*pipeOperations)(nil)) +} diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go deleted file mode 100644 index 89d8be741..000000000 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ /dev/null @@ -1,523 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fdpipe - -import ( - "bytes" - "fmt" - "io" - "os" - "path" - "testing" - "time" - - "github.com/google/uuid" - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -type hostOpener struct { - name string -} - -func (h *hostOpener) NonBlockingOpen(_ context.Context, p fs.PermMask) (*fd.FD, error) { - var flags int - switch { - case p.Read && p.Write: - flags = unix.O_RDWR - case p.Write: - flags = unix.O_WRONLY - case p.Read: - flags = unix.O_RDONLY - default: - return nil, unix.EINVAL - } - f, err := unix.Open(h.name, flags|unix.O_NONBLOCK, 0666) - if err != nil { - return nil, err - } - return fd.New(f), nil -} - -func pipename() string { - return fmt.Sprintf(path.Join(os.TempDir(), "test-named-pipe-%s"), uuid.New()) -} - -func mkpipe(name string) error { - return unix.Mknod(name, unix.S_IFIFO|0666, 0) -} - -func TestTryOpen(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // makePipe is true if the test case should create the pipe. - makePipe bool - - // flags are the fs.FileFlags used to open the pipe. - flags fs.FileFlags - - // expectFile is true if a fs.File is expected. - expectFile bool - - // err is the expected error - err error - }{ - { - desc: "FileFlags lacking Read and Write are invalid", - makePipe: false, - flags: fs.FileFlags{}, /* bogus */ - expectFile: false, - err: unix.EINVAL, - }, - { - desc: "NonBlocking Read only error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true, NonBlocking: true}, - expectFile: false, - err: unix.ENOENT, - }, - { - desc: "NonBlocking Read only success returns immediately", - makePipe: true, - flags: fs.FileFlags{Read: true, NonBlocking: true}, - expectFile: true, - err: nil, - }, - { - desc: "NonBlocking Write only error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Write: true, NonBlocking: true}, - expectFile: false, - err: unix.ENOENT, - }, - { - desc: "NonBlocking Write only no reader error returns immediately", - makePipe: true, - flags: fs.FileFlags{Write: true, NonBlocking: true}, - expectFile: false, - err: unix.ENXIO, - }, - { - desc: "ReadWrite error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true, Write: true}, - expectFile: false, - err: unix.ENOENT, - }, - { - desc: "ReadWrite returns immediately", - makePipe: true, - flags: fs.FileFlags{Read: true, Write: true}, - expectFile: true, - err: nil, - }, - { - desc: "Blocking Write only returns open error", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Write: true}, - expectFile: false, - err: unix.ENOENT, /* from bogus perms */ - }, - { - desc: "Blocking Read only returns open error", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true}, - expectFile: false, - err: unix.ENOENT, - }, - { - desc: "Blocking Write only returns with syserror.ErrWouldBlock", - makePipe: true, - flags: fs.FileFlags{Write: true}, - expectFile: false, - err: syserror.ErrWouldBlock, - }, - { - desc: "Blocking Read only returns with syserror.ErrWouldBlock", - makePipe: true, - flags: fs.FileFlags{Read: true}, - expectFile: false, - err: syserror.ErrWouldBlock, - }, - } { - name := pipename() - if test.makePipe { - // Create the pipe. We do this per-test case to keep tests independent. - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer unix.Unlink(name) - } - - // Use a host opener to keep things simple. - opener := &hostOpener{name: name} - - pipeOpenState := &pipeOpenState{} - ctx := contexttest.Context(t) - pipeOps, err := pipeOpenState.TryOpen(ctx, opener, test.flags) - if unwrapError(err) != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - if pipeOps != nil { - // Cleanup the state of the pipe, and remove the fd from the - // fdnotifier. Sadly this needed to maintain the correctness - // of other tests because the fdnotifier is global. - pipeOps.Release(ctx) - } - continue - } - if (pipeOps != nil) != test.expectFile { - t.Errorf("%s: got non-nil file %v, want %v", test.desc, pipeOps != nil, test.expectFile) - } - if pipeOps != nil { - // Same as above. - pipeOps.Release(ctx) - } - } -} - -func TestPipeOpenUnblocksEventually(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // partnerIsReader is true if the goroutine opening the same pipe as the test case - // should open the pipe read only. Otherwise write only. This also means that the - // test case will open the pipe in the opposite way. - partnerIsReader bool - - // partnerIsBlocking is true if the goroutine opening the same pipe as the test case - // should do so without the O_NONBLOCK flag, otherwise opens the pipe with O_NONBLOCK - // until ENXIO is not returned. - partnerIsBlocking bool - }{ - { - desc: "Blocking Read with blocking writer partner opens eventually", - partnerIsReader: false, - partnerIsBlocking: true, - }, - { - desc: "Blocking Write with blocking reader partner opens eventually", - partnerIsReader: true, - partnerIsBlocking: true, - }, - { - desc: "Blocking Read with non-blocking writer partner opens eventually", - partnerIsReader: false, - partnerIsBlocking: false, - }, - { - desc: "Blocking Write with non-blocking reader partner opens eventually", - partnerIsReader: true, - partnerIsBlocking: false, - }, - } { - // Create the pipe. We do this per-test case to keep tests independent. - name := pipename() - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer unix.Unlink(name) - - // Spawn the partner. - type fderr struct { - fd int - err error - } - errch := make(chan fderr, 1) - go func() { - var flags int - if test.partnerIsReader { - flags = unix.O_RDONLY - } else { - flags = unix.O_WRONLY - } - if test.partnerIsBlocking { - fd, err := unix.Open(name, flags, 0666) - errch <- fderr{fd: fd, err: err} - } else { - var fd int - err := error(unix.ENXIO) - for err == unix.ENXIO { - fd, err = unix.Open(name, flags|unix.O_NONBLOCK, 0666) - time.Sleep(1 * time.Second) - } - errch <- fderr{fd: fd, err: err} - } - }() - - // Setup file flags for either a read only or write only open. - flags := fs.FileFlags{ - Read: !test.partnerIsReader, - Write: test.partnerIsReader, - } - - // Open the pipe in a blocking way, which should succeed eventually. - opener := &hostOpener{name: name} - ctx := contexttest.Context(t) - pipeOps, err := Open(ctx, opener, flags) - if pipeOps != nil { - // Same as TestTryOpen. - pipeOps.Release(ctx) - } - - // Check that the partner opened the file successfully. - e := <-errch - if e.err != nil { - t.Errorf("%s: partner got error %v, wanted nil", test.desc, e.err) - continue - } - // If so, then close the partner fd to avoid leaking an fd. - unix.Close(e.fd) - - // Check that our blocking open was successful. - if err != nil { - t.Errorf("%s: blocking open got error %v, wanted nil", test.desc, err) - continue - } - if pipeOps == nil { - t.Errorf("%s: blocking open got nil file, wanted non-nil", test.desc) - continue - } - } -} - -func TestCopiedReadAheadBuffer(t *testing.T) { - // Create the pipe. - name := pipename() - if err := mkpipe(name); err != nil { - t.Fatalf("failed to make host pipe: %v", err) - } - defer unix.Unlink(name) - - // We're taking advantage of the fact that pipes opened read only always return - // success, but internally they are not deemed "opened" until we're sure that - // another writer comes along. This means we can open the same pipe write only - // with no problems + write to it, given that opener.Open already tried to open - // the pipe RDONLY and succeeded, which we know happened if TryOpen returns - // syserror.ErrwouldBlock. - // - // This simulates the open(RDONLY) <-> open(WRONLY)+write race we care about, but - // does not cause our test to be racy (which would be terrible). - opener := &hostOpener{name: name} - pipeOpenState := &pipeOpenState{} - ctx := contexttest.Context(t) - pipeOps, err := pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) - if pipeOps != nil { - pipeOps.Release(ctx) - t.Fatalf("open(%s, %o) got file, want nil", name, unix.O_RDONLY) - } - if err != syserror.ErrWouldBlock { - t.Fatalf("open(%s, %o) got error %v, want %v", name, unix.O_RDONLY, err, syserror.ErrWouldBlock) - } - - // Then open the same pipe write only and write some bytes to it. The next - // time we try to open the pipe read only again via the pipeOpenState, we should - // succeed and buffer some of the bytes written. - fd, err := unix.Open(name, unix.O_WRONLY, 0666) - if err != nil { - t.Fatalf("open(%s, %o) got error %v, want nil", name, unix.O_WRONLY, err) - } - defer unix.Close(fd) - - data := []byte("hello") - if n, err := unix.Write(fd, data); n != len(data) || err != nil { - t.Fatalf("write(%v) got (%d, %v), want (%d, nil)", data, n, err, len(data)) - } - - // Try the read again, knowing that it should succeed this time. - pipeOps, err = pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) - if pipeOps == nil { - t.Fatalf("open(%s, %o) got nil file, want not nil", name, unix.O_RDONLY) - } - defer pipeOps.Release(ctx) - - if err != nil { - t.Fatalf("open(%s, %o) got error %v, want nil", name, unix.O_RDONLY, err) - } - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, pipeOps) - - // Check that the file we opened points to a pipe with a non-empty read ahead buffer. - bufsize := len(pipeOps.readAheadBuffer) - if bufsize != 1 { - t.Fatalf("read ahead buffer got %d bytes, want %d", bufsize, 1) - } - - // Now for the final test, try to read everything in, expecting to get back all of - // the bytes that were written at once. Note that in the wild there is no atomic - // read size so expecting to get all bytes from a single writer when there are - // multiple readers is a bad expectation. - buf := make([]byte, len(data)) - ioseq := usermem.BytesIOSequence(buf) - n, err := pipeOps.Read(ctx, file, ioseq, 0) - if err != nil { - t.Fatalf("read request got error %v, want nil", err) - } - if n != int64(len(data)) { - t.Fatalf("read request got %d bytes, want %d", n, len(data)) - } - if !bytes.Equal(buf, data) { - t.Errorf("read request got bytes [%v], want [%v]", buf, data) - } -} - -func TestPipeHangup(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // flags control how we open our end of the pipe and must be read - // only or write only. They also dicate how a coordinating partner - // fd is opened, which is their inverse (read only -> write only, etc). - flags fs.FileFlags - - // hangupSelf if true causes the test case to close our end of the pipe - // and causes hangup errors to be asserted on our coordinating partner's - // fd. If hangupSelf is false, then our partner's fd is closed and the - // hangup errors are expected on our end of the pipe. - hangupSelf bool - }{ - { - desc: "Read only gets hangup error", - flags: fs.FileFlags{Read: true}, - }, - { - desc: "Write only gets hangup error", - flags: fs.FileFlags{Write: true}, - }, - { - desc: "Read only generates hangup error", - flags: fs.FileFlags{Read: true}, - hangupSelf: true, - }, - { - desc: "Write only generates hangup error", - flags: fs.FileFlags{Write: true}, - hangupSelf: true, - }, - } { - if test.flags.Read == test.flags.Write { - t.Errorf("%s: test requires a single reader or writer", test.desc) - continue - } - - // Create the pipe. We do this per-test case to keep tests independent. - name := pipename() - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer unix.Unlink(name) - - // Fire off a partner routine which tries to open the same pipe blocking, - // which will synchronize with us. The channel allows us to get back the - // fd once we expect this partner routine to succeed, so we can manifest - // hangup events more directly. - fdchan := make(chan int, 1) - go func() { - // Be explicit about the flags to protect the test from - // misconfiguration. - var flags int - if test.flags.Read { - flags = unix.O_WRONLY - } else { - flags = unix.O_RDONLY - } - fd, err := unix.Open(name, flags, 0666) - if err != nil { - t.Logf("Open(%q, %o, 0666) partner failed: %v", name, flags, err) - } - fdchan <- fd - }() - - // Open our end in a blocking way to ensure that we coordinate. - opener := &hostOpener{name: name} - ctx := contexttest.Context(t) - pipeOps, err := Open(ctx, opener, test.flags) - if err != nil { - t.Errorf("%s: Open got error %v, want nil", test.desc, err) - continue - } - // Don't defer file.DecRef here because that causes the hangup we're - // trying to test for. - - // Expect the partner routine to have coordinated with us and get back - // its open fd. - f := <-fdchan - if f < 0 { - t.Errorf("%s: partner routine got fd %d, want > 0", test.desc, f) - pipeOps.Release(ctx) - continue - } - - if test.hangupSelf { - // Hangup self and assert that our partner got the expected hangup - // error. - pipeOps.Release(ctx) - - if test.flags.Read { - // Partner is writer. - assertWriterHungup(t, test.desc, fd.NewReadWriter(f)) - } else { - // Partner is reader. - assertReaderHungup(t, test.desc, fd.NewReadWriter(f)) - } - } else { - // Hangup our partner and expect us to get the hangup error. - unix.Close(f) - defer pipeOps.Release(ctx) - - if test.flags.Read { - assertReaderHungup(t, test.desc, pipeOps.(*pipeOperations).file) - } else { - assertWriterHungup(t, test.desc, pipeOps.(*pipeOperations).file) - } - } - } -} - -func assertReaderHungup(t *testing.T, desc string, reader io.Reader) bool { - // Drain the pipe completely, it might have crap in it, but expect EOF eventually. - var err error - for err == nil { - _, err = reader.Read(make([]byte, 10)) - } - if err != io.EOF { - t.Errorf("%s: read from self after hangup got error %v, want %v", desc, err, io.EOF) - return false - } - return true -} - -func assertWriterHungup(t *testing.T, desc string, writer io.Writer) bool { - if _, err := writer.Write([]byte("hello")); !linuxerr.Equals(linuxerr.EPIPE, unwrapError(err)) { - t.Errorf("%s: write to self after hangup got error %v, want %v", desc, err, linuxerr.EPIPE) - return false - } - return true -} diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go deleted file mode 100644 index 4c8905a7e..000000000 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ /dev/null @@ -1,514 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fdpipe - -import ( - "bytes" - "io" - "os" - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/errors" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -func singlePipeFD() (int, error) { - fds := make([]int, 2) - if err := unix.Pipe(fds); err != nil { - return -1, err - } - unix.Close(fds[1]) - return fds[0], nil -} - -func singleDirFD() (int, error) { - return unix.Open(os.TempDir(), unix.O_RDONLY, 0666) -} - -func mockPipeDirent(t *testing.T) *fs.Dirent { - ctx := contexttest.Context(t) - node := fs.NewMockInodeOperations(ctx) - node.UAttr = fs.UnstableAttr{ - Perms: fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, - }, - } - inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - BlockSize: hostarch.PageSize, - }) - return fs.NewDirent(ctx, inode, "") -} - -func TestNewPipe(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // getfd generates the fd to pass to newPipeOperations. - getfd func() (int, error) - - // flags are the fs.FileFlags passed to newPipeOperations. - flags fs.FileFlags - - // readAheadBuffer is the buffer passed to newPipeOperations. - readAheadBuffer []byte - - // err is the expected error. - err error - }{ - { - desc: "Cannot make new pipe from bad fd", - getfd: func() (int, error) { return -1, nil }, - err: unix.EINVAL, - }, - { - desc: "Cannot make new pipe from non-pipe fd", - getfd: singleDirFD, - err: unix.EINVAL, - }, - { - desc: "Can make new pipe from pipe fd", - getfd: singlePipeFD, - flags: fs.FileFlags{Read: true}, - readAheadBuffer: []byte("hello"), - }, - } { - gfd, err := test.getfd() - if err != nil { - t.Errorf("%s: getfd got (%d, %v), want (fd, nil)", test.desc, gfd, err) - continue - } - f := fd.New(gfd) - - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, test.flags, f, test.readAheadBuffer) - if p != nil { - // This is necessary to remove the fd from the global fd notifier. - defer p.Release(ctx) - } else { - // If there is no p to DecRef on, because newPipeOperations failed, then the - // file still needs to be closed. - defer f.Close() - } - - if err != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - continue - } - // Check the state of the pipe given that it was successfully opened. - if err == nil { - if p == nil { - t.Errorf("%s: got nil pipe and nil error, want (pipe, nil)", test.desc) - continue - } - if flags := p.flags; test.flags != flags { - t.Errorf("%s: got file flags %v, want %v", test.desc, flags, test.flags) - continue - } - if len(test.readAheadBuffer) != len(p.readAheadBuffer) { - t.Errorf("%s: got read ahead buffer length %d, want %d", test.desc, len(p.readAheadBuffer), len(test.readAheadBuffer)) - continue - } - fileFlags, _, errno := unix.Syscall(unix.SYS_FCNTL, uintptr(p.file.FD()), unix.F_GETFL, 0) - if errno != 0 { - t.Errorf("%s: failed to get file flags for fd %d, got %v, want 0", test.desc, p.file.FD(), errno) - continue - } - if fileFlags&unix.O_NONBLOCK == 0 { - t.Errorf("%s: pipe is blocking, expected non-blocking", test.desc) - continue - } - if !fdnotifier.HasFD(int32(f.FD())) { - t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD()) - } - } - } -} - -func TestPipeDestruction(t *testing.T) { - fds := make([]int, 2) - if err := unix.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - f := fd.New(fds[0]) - - // We don't care about the other end, just use the read end. - unix.Close(fds[1]) - - // Test the read end, but it doesn't really matter which. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, f, nil) - if err != nil { - f.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Drop our only reference, which should trigger the destructor. - p.Release(ctx) - - if fdnotifier.HasFD(int32(fds[0])) { - t.Fatalf("after DecRef fdnotifier has fd %d, want no longer registered", fds[0]) - } - if p.file != nil { - t.Errorf("after DecRef got file, want nil") - } -} - -type Seek struct{} - -type ReadDir struct{} - -type Writev struct { - Src usermem.IOSequence -} - -type Readv struct { - Dst usermem.IOSequence -} - -type Fsync struct{} - -func TestPipeRequest(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // request to execute. - context interface{} - - // flags determines whether to use the read or write end - // of the pipe, for this test it can only be Read or Write. - flags fs.FileFlags - - // keepOpenPartner if false closes the other end of the pipe, - // otherwise this is delayed until the end of the test. - keepOpenPartner bool - - // expected error - err error - }{ - { - desc: "ReadDir on pipe returns ENOTDIR", - context: &ReadDir{}, - err: linuxerr.ENOTDIR, - }, - { - desc: "Fsync on pipe returns EINVAL", - context: &Fsync{}, - err: linuxerr.EINVAL, - }, - { - desc: "Seek on pipe returns ESPIPE", - context: &Seek{}, - err: linuxerr.ESPIPE, - }, - { - desc: "Readv on pipe from empty buffer returns nil", - context: &Readv{Dst: usermem.BytesIOSequence(nil)}, - flags: fs.FileFlags{Read: true}, - }, - { - desc: "Readv on pipe from non-empty buffer and closed partner returns EOF", - context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, - flags: fs.FileFlags{Read: true}, - err: io.EOF, - }, - { - desc: "Readv on pipe from non-empty buffer and open partner returns EWOULDBLOCK", - context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, - flags: fs.FileFlags{Read: true}, - keepOpenPartner: true, - err: syserror.ErrWouldBlock, - }, - { - desc: "Writev on pipe from empty buffer returns nil", - context: &Writev{Src: usermem.BytesIOSequence(nil)}, - flags: fs.FileFlags{Write: true}, - }, - { - desc: "Writev on pipe from non-empty buffer and closed partner returns EPIPE", - context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, - flags: fs.FileFlags{Write: true}, - err: linuxerr.EPIPE, - }, - { - desc: "Writev on pipe from non-empty buffer and open partner succeeds", - context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, - flags: fs.FileFlags{Write: true}, - keepOpenPartner: true, - }, - } { - if test.flags.Read && test.flags.Write { - panic("both read and write not supported for this test") - } - - fds := make([]int, 2) - if err := unix.Pipe(fds); err != nil { - t.Errorf("%s: failed to create pipes: got %v, want nil", test.desc, err) - continue - } - - // Configure the fd and partner fd based on the file flags. - testFd, partnerFd := fds[0], fds[1] - if test.flags.Write { - testFd, partnerFd = fds[1], fds[0] - } - - // Configure closing the fds. - if test.keepOpenPartner { - defer unix.Close(partnerFd) - } else { - unix.Close(partnerFd) - } - - // Create the pipe. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, test.flags, fd.New(testFd), nil) - if err != nil { - t.Fatalf("%s: newPipeOperations got error %v, want nil", test.desc, err) - } - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // Issue request via the appropriate function. - switch c := test.context.(type) { - case *Seek: - _, err = p.Seek(ctx, file, 0, 0) - case *ReadDir: - _, err = p.Readdir(ctx, file, nil) - case *Readv: - _, err = p.Read(ctx, file, c.Dst, 0) - case *Writev: - _, err = p.Write(ctx, file, c.Src, 0) - case *Fsync: - err = p.Fsync(ctx, file, 0, fs.FileMaxOffset, fs.SyncAll) - default: - t.Errorf("%s: unknown request type %T", test.desc, test.context) - } - - if linuxErr, ok := test.err.(*errors.Error); ok { - if !linuxerr.Equals(linuxErr, unwrapError(err)) { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - } - } else if test.err != unwrapError(err) { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - } - } -} - -func TestPipeReadAheadBuffer(t *testing.T) { - fds := make([]int, 2) - if err := unix.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - rfile := fd.New(fds[0]) - - // Eventually close the write end, which is not wrapped in a pipe object. - defer unix.Close(fds[1]) - - // Write some bytes to this end. - data := []byte("world") - if n, err := unix.Write(fds[1], data); n != len(data) || err != nil { - rfile.Close() - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) - } - // Close the write end immediately, we don't care about it. - - buffered := []byte("hello ") - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, rfile, buffered) - if err != nil { - rfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // In total we expect to read data + buffered. - total := append(buffered, data...) - - buf := make([]byte, len(total)) - iov := usermem.BytesIOSequence(buf) - n, err := p.Read(contexttest.Context(t), file, iov, 0) - if err != nil { - t.Fatalf("read request got error %v, want nil", err) - } - if n != int64(len(total)) { - t.Fatalf("read request got %d bytes, want %d", n, len(total)) - } - if !bytes.Equal(buf, total) { - t.Errorf("read request got bytes [%v], want [%v]", buf, total) - } -} - -// This is very important for pipes in general because they can return -// EWOULDBLOCK and for those that block they must continue until they have read -// all of the data (and report it as such). -func TestPipeReadsAccumulate(t *testing.T) { - fds := make([]int, 2) - if err := unix.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - rfile := fd.New(fds[0]) - - // Eventually close the write end, it doesn't depend on a pipe object. - defer unix.Close(fds[1]) - - // Get a new read only pipe reference. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, rfile, nil) - if err != nil { - rfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests will - // likely be borked, because it's global :( - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // Write some some bytes to the pipe. - data := []byte("some message") - if n, err := unix.Write(fds[1], data); n != len(data) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) - } - - // Construct a segment vec that is a bit more than we have written so we - // trigger an EWOULDBLOCK. - wantBytes := len(data) + 1 - readBuffer := make([]byte, wantBytes) - iov := usermem.BytesIOSequence(readBuffer) - n, err := p.Read(ctx, file, iov, 0) - total := n - iov = iov.DropFirst64(n) - if err != syserror.ErrWouldBlock { - t.Fatalf("Readv got error %v, want %v", err, syserror.ErrWouldBlock) - } - - // Write a few more bytes to allow us to read more/accumulate. - extra := []byte("extra") - if n, err := unix.Write(fds[1], extra); n != len(extra) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(extra)) - } - - // This time, using the same request, we should not block. - n, err = p.Read(ctx, file, iov, 0) - total += n - if err != nil { - t.Fatalf("Readv got error %v, want nil", err) - } - - // Assert that the result we got back is cumulative. - if total != int64(wantBytes) { - t.Fatalf("Readv sequence got %d bytes, want %d", total, wantBytes) - } - - if want := append(data, extra[0]); !bytes.Equal(readBuffer, want) { - t.Errorf("Readv sequence got %v, want %v", readBuffer, want) - } -} - -// Same as TestReadsAccumulate. -func TestPipeWritesAccumulate(t *testing.T) { - fds := make([]int, 2) - if err := unix.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - wfile := fd.New(fds[1]) - - // Eventually close the read end, it doesn't depend on a pipe object. - defer unix.Close(fds[0]) - - // Get a new write only pipe reference. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Write: true}, wfile, nil) - if err != nil { - wfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests - // will likely be borked, because it's global :( - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - pipeSize, _, errno := unix.Syscall(unix.SYS_FCNTL, uintptr(wfile.FD()), unix.F_GETPIPE_SZ, 0) - if errno != 0 { - t.Fatalf("fcntl(F_GETPIPE_SZ) failed: %v", errno) - } - t.Logf("Pipe buffer size: %d", pipeSize) - - // Construct a segment vec that is larger than the pipe size to trigger an - // EWOULDBLOCK. - wantBytes := int(pipeSize) * 2 - writeBuffer := make([]byte, wantBytes) - for i := 0; i < wantBytes; i++ { - writeBuffer[i] = 'a' - } - iov := usermem.BytesIOSequence(writeBuffer) - n, err := p.Write(ctx, file, iov, 0) - if err != syserror.ErrWouldBlock { - t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock) - } - if n != int64(pipeSize) { - t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) - } - total := n - iov = iov.DropFirst64(n) - - // Read the entire pipe buf size to make space for the second half. - readBuffer := make([]byte, n) - if n, err := unix.Read(fds[0], readBuffer); n != len(readBuffer) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(readBuffer)) - } - if !bytes.Equal(readBuffer, writeBuffer[:len(readBuffer)]) { - t.Fatalf("wrong data read from pipe, got: %v, want: %v", readBuffer, writeBuffer) - } - - // This time we should not block. - n, err = p.Write(ctx, file, iov, 0) - if err != nil { - t.Fatalf("Writev got error %v, want nil", err) - } - if n != int64(pipeSize) { - t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) - } - total += n - - // Assert that the result we got back is cumulative. - if total != int64(wantBytes) { - t.Fatalf("Writev sequence got %d bytes, want %d", total, wantBytes) - } -} diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go deleted file mode 100644 index 1971cc680..000000000 --- a/pkg/sentry/fs/file_overlay_test.go +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -func TestReaddir(t *testing.T) { - ctx := contexttest.Context(t) - ctx = &rootContext{ - Context: ctx, - root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), - } - for _, test := range []struct { - // Test description. - desc string - - // Lookup parameters. - dir *fs.Inode - - // Want from lookup. - err error - names []string - }{ - { - desc: "no upper, lower has entries", - dir: fs.NewTestOverlayDir(ctx, - nil, /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - {name: "b"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper has entries, no lower", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - {name: "b"}, - }, nil), /* upper */ - nil, /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper and lower, entries combine", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "b"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper and lower, entries combine, none are masked", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "c"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "c"}, - }, - { - desc: "upper and lower, entries combine, upper masks some of lower", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "b"}, /* will be masked */ - {name: "c"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "c"}, - }, - } { - t.Run(test.desc, func(t *testing.T) { - openDir, err := test.dir.GetFile(ctx, fs.NewDirent(ctx, test.dir, "stub"), fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile got error %v, want nil", err) - } - stubSerializer := &fs.CollectEntriesSerializer{} - err = openDir.Readdir(ctx, stubSerializer) - if err != test.err { - t.Fatalf("Readdir got error %v, want nil", err) - } - if err != nil { - return - } - if !reflect.DeepEqual(stubSerializer.Order, test.names) { - t.Errorf("Readdir got names %v, want %v", stubSerializer.Order, test.names) - } - }) - } -} - -func TestReaddirRevalidation(t *testing.T) { - ctx := contexttest.Context(t) - ctx = &rootContext{ - Context: ctx, - root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), - } - - // Create an overlay with two directories, each with one file. - upper := newTestRamfsDir(ctx, []dirContent{{name: "a"}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: "b"}}, nil) - overlay := fs.NewTestOverlayDir(ctx, upper, lower, true /* revalidate */) - - // Get a handle to the dirent in the upper filesystem so that we can - // modify it without going through the dirent. - upperDir := upper.InodeOperations.(*dir).InodeOperations.(*ramfs.Dir) - - // Check that overlay returns the files from both upper and lower. - openDir, err := overlay.GetFile(ctx, fs.NewDirent(ctx, overlay, "stub"), fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile got error %v, want nil", err) - } - ser := &fs.CollectEntriesSerializer{} - if err := openDir.Readdir(ctx, ser); err != nil { - t.Fatalf("Readdir got error %v, want nil", err) - } - got, want := ser.Order, []string{".", "..", "a", "b"} - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Remove "a" from the upper and add "c". - if err := upperDir.Remove(ctx, upper, "a"); err != nil { - t.Fatalf("error removing child: %v", err) - } - upperDir.AddChild(ctx, "c", fs.NewInode(ctx, fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0), - upper.MountSource, fs.StableAttr{Type: fs.RegularFile})) - - // Seek to beginning of the directory and do the readdir again. - if _, err := openDir.Seek(ctx, fs.SeekSet, 0); err != nil { - t.Fatalf("error seeking to beginning of dir: %v", err) - } - ser = &fs.CollectEntriesSerializer{} - if err := openDir.Readdir(ctx, ser); err != nil { - t.Fatalf("Readdir got error %v, want nil", err) - } - - // Readdir should return the updated children. - got, want = ser.Order, []string{".", "..", "b", "c"} - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } -} - -type rootContext struct { - context.Context - root *fs.Dirent -} - -// Value implements context.Context. -func (r *rootContext) Value(key interface{}) interface{} { - switch key { - case fs.CtxRoot: - r.root.IncRef() - return r.root - default: - return r.Context.Value(key) - } -} diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD deleted file mode 100644 index a8000e010..000000000 --- a/pkg/sentry/fs/filetest/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "filetest", - testonly = 1, - srcs = ["filetest.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/filetest/filetest.go b/pkg/sentry/fs/filetest/filetest.go deleted file mode 100644 index ec3d3f96c..000000000 --- a/pkg/sentry/fs/filetest/filetest.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package filetest provides a test implementation of an fs.File. -package filetest - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/anon" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -// TestFileOperations is an implementation of the File interface. It provides all -// required methods. -type TestFileOperations struct { - fsutil.FileNoopRelease `state:"nosave"` - fsutil.FilePipeSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` -} - -// NewTestFile creates and initializes a new test file. -func NewTestFile(tb testing.TB) *fs.File { - ctx := contexttest.Context(tb) - dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "test") - return fs.NewFile(ctx, dirent, fs.FileFlags{}, &TestFileOperations{}) -} - -// Read just fails the request. -func (*TestFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, fmt.Errorf("TestFileOperations.Read not implemented") -} - -// Write just fails the request. -func (*TestFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, fmt.Errorf("TestFileOperations.Write not implemented") -} diff --git a/pkg/sentry/fs/fs_state_autogen.go b/pkg/sentry/fs/fs_state_autogen.go new file mode 100644 index 000000000..a4a67fca2 --- /dev/null +++ b/pkg/sentry/fs/fs_state_autogen.go @@ -0,0 +1,1207 @@ +// automatically generated by stateify. + +package fs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *StableAttr) StateTypeName() string { + return "pkg/sentry/fs.StableAttr" +} + +func (s *StableAttr) StateFields() []string { + return []string{ + "Type", + "DeviceID", + "InodeID", + "BlockSize", + "DeviceFileMajor", + "DeviceFileMinor", + } +} + +func (s *StableAttr) beforeSave() {} + +// +checklocksignore +func (s *StableAttr) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Type) + stateSinkObject.Save(1, &s.DeviceID) + stateSinkObject.Save(2, &s.InodeID) + stateSinkObject.Save(3, &s.BlockSize) + stateSinkObject.Save(4, &s.DeviceFileMajor) + stateSinkObject.Save(5, &s.DeviceFileMinor) +} + +func (s *StableAttr) afterLoad() {} + +// +checklocksignore +func (s *StableAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Type) + stateSourceObject.Load(1, &s.DeviceID) + stateSourceObject.Load(2, &s.InodeID) + stateSourceObject.Load(3, &s.BlockSize) + stateSourceObject.Load(4, &s.DeviceFileMajor) + stateSourceObject.Load(5, &s.DeviceFileMinor) +} + +func (ua *UnstableAttr) StateTypeName() string { + return "pkg/sentry/fs.UnstableAttr" +} + +func (ua *UnstableAttr) StateFields() []string { + return []string{ + "Size", + "Usage", + "Perms", + "Owner", + "AccessTime", + "ModificationTime", + "StatusChangeTime", + "Links", + } +} + +func (ua *UnstableAttr) beforeSave() {} + +// +checklocksignore +func (ua *UnstableAttr) StateSave(stateSinkObject state.Sink) { + ua.beforeSave() + stateSinkObject.Save(0, &ua.Size) + stateSinkObject.Save(1, &ua.Usage) + stateSinkObject.Save(2, &ua.Perms) + stateSinkObject.Save(3, &ua.Owner) + stateSinkObject.Save(4, &ua.AccessTime) + stateSinkObject.Save(5, &ua.ModificationTime) + stateSinkObject.Save(6, &ua.StatusChangeTime) + stateSinkObject.Save(7, &ua.Links) +} + +func (ua *UnstableAttr) afterLoad() {} + +// +checklocksignore +func (ua *UnstableAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ua.Size) + stateSourceObject.Load(1, &ua.Usage) + stateSourceObject.Load(2, &ua.Perms) + stateSourceObject.Load(3, &ua.Owner) + stateSourceObject.Load(4, &ua.AccessTime) + stateSourceObject.Load(5, &ua.ModificationTime) + stateSourceObject.Load(6, &ua.StatusChangeTime) + stateSourceObject.Load(7, &ua.Links) +} + +func (a *AttrMask) StateTypeName() string { + return "pkg/sentry/fs.AttrMask" +} + +func (a *AttrMask) StateFields() []string { + return []string{ + "Type", + "DeviceID", + "InodeID", + "BlockSize", + "Size", + "Usage", + "Perms", + "UID", + "GID", + "AccessTime", + "ModificationTime", + "StatusChangeTime", + "Links", + } +} + +func (a *AttrMask) beforeSave() {} + +// +checklocksignore +func (a *AttrMask) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.Type) + stateSinkObject.Save(1, &a.DeviceID) + stateSinkObject.Save(2, &a.InodeID) + stateSinkObject.Save(3, &a.BlockSize) + stateSinkObject.Save(4, &a.Size) + stateSinkObject.Save(5, &a.Usage) + stateSinkObject.Save(6, &a.Perms) + stateSinkObject.Save(7, &a.UID) + stateSinkObject.Save(8, &a.GID) + stateSinkObject.Save(9, &a.AccessTime) + stateSinkObject.Save(10, &a.ModificationTime) + stateSinkObject.Save(11, &a.StatusChangeTime) + stateSinkObject.Save(12, &a.Links) +} + +func (a *AttrMask) afterLoad() {} + +// +checklocksignore +func (a *AttrMask) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.Type) + stateSourceObject.Load(1, &a.DeviceID) + stateSourceObject.Load(2, &a.InodeID) + stateSourceObject.Load(3, &a.BlockSize) + stateSourceObject.Load(4, &a.Size) + stateSourceObject.Load(5, &a.Usage) + stateSourceObject.Load(6, &a.Perms) + stateSourceObject.Load(7, &a.UID) + stateSourceObject.Load(8, &a.GID) + stateSourceObject.Load(9, &a.AccessTime) + stateSourceObject.Load(10, &a.ModificationTime) + stateSourceObject.Load(11, &a.StatusChangeTime) + stateSourceObject.Load(12, &a.Links) +} + +func (p *PermMask) StateTypeName() string { + return "pkg/sentry/fs.PermMask" +} + +func (p *PermMask) StateFields() []string { + return []string{ + "Read", + "Write", + "Execute", + } +} + +func (p *PermMask) beforeSave() {} + +// +checklocksignore +func (p *PermMask) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Read) + stateSinkObject.Save(1, &p.Write) + stateSinkObject.Save(2, &p.Execute) +} + +func (p *PermMask) afterLoad() {} + +// +checklocksignore +func (p *PermMask) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Read) + stateSourceObject.Load(1, &p.Write) + stateSourceObject.Load(2, &p.Execute) +} + +func (f *FilePermissions) StateTypeName() string { + return "pkg/sentry/fs.FilePermissions" +} + +func (f *FilePermissions) StateFields() []string { + return []string{ + "User", + "Group", + "Other", + "Sticky", + "SetUID", + "SetGID", + } +} + +func (f *FilePermissions) beforeSave() {} + +// +checklocksignore +func (f *FilePermissions) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.User) + stateSinkObject.Save(1, &f.Group) + stateSinkObject.Save(2, &f.Other) + stateSinkObject.Save(3, &f.Sticky) + stateSinkObject.Save(4, &f.SetUID) + stateSinkObject.Save(5, &f.SetGID) +} + +func (f *FilePermissions) afterLoad() {} + +// +checklocksignore +func (f *FilePermissions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.User) + stateSourceObject.Load(1, &f.Group) + stateSourceObject.Load(2, &f.Other) + stateSourceObject.Load(3, &f.Sticky) + stateSourceObject.Load(4, &f.SetUID) + stateSourceObject.Load(5, &f.SetGID) +} + +func (f *FileOwner) StateTypeName() string { + return "pkg/sentry/fs.FileOwner" +} + +func (f *FileOwner) StateFields() []string { + return []string{ + "UID", + "GID", + } +} + +func (f *FileOwner) beforeSave() {} + +// +checklocksignore +func (f *FileOwner) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.UID) + stateSinkObject.Save(1, &f.GID) +} + +func (f *FileOwner) afterLoad() {} + +// +checklocksignore +func (f *FileOwner) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.UID) + stateSourceObject.Load(1, &f.GID) +} + +func (d *DentAttr) StateTypeName() string { + return "pkg/sentry/fs.DentAttr" +} + +func (d *DentAttr) StateFields() []string { + return []string{ + "Type", + "InodeID", + } +} + +func (d *DentAttr) beforeSave() {} + +// +checklocksignore +func (d *DentAttr) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Type) + stateSinkObject.Save(1, &d.InodeID) +} + +func (d *DentAttr) afterLoad() {} + +// +checklocksignore +func (d *DentAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Type) + stateSourceObject.Load(1, &d.InodeID) +} + +func (s *SortedDentryMap) StateTypeName() string { + return "pkg/sentry/fs.SortedDentryMap" +} + +func (s *SortedDentryMap) StateFields() []string { + return []string{ + "names", + "entries", + } +} + +func (s *SortedDentryMap) beforeSave() {} + +// +checklocksignore +func (s *SortedDentryMap) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.names) + stateSinkObject.Save(1, &s.entries) +} + +func (s *SortedDentryMap) afterLoad() {} + +// +checklocksignore +func (s *SortedDentryMap) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.names) + stateSourceObject.Load(1, &s.entries) +} + +func (d *Dirent) StateTypeName() string { + return "pkg/sentry/fs.Dirent" +} + +func (d *Dirent) StateFields() []string { + return []string{ + "AtomicRefCount", + "userVisible", + "Inode", + "name", + "parent", + "deleted", + "mounted", + "children", + } +} + +// +checklocksignore +func (d *Dirent) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + var childrenValue map[string]*Dirent = d.saveChildren() + stateSinkObject.SaveValue(7, childrenValue) + stateSinkObject.Save(0, &d.AtomicRefCount) + stateSinkObject.Save(1, &d.userVisible) + stateSinkObject.Save(2, &d.Inode) + stateSinkObject.Save(3, &d.name) + stateSinkObject.Save(4, &d.parent) + stateSinkObject.Save(5, &d.deleted) + stateSinkObject.Save(6, &d.mounted) +} + +// +checklocksignore +func (d *Dirent) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.AtomicRefCount) + stateSourceObject.Load(1, &d.userVisible) + stateSourceObject.Load(2, &d.Inode) + stateSourceObject.Load(3, &d.name) + stateSourceObject.Load(4, &d.parent) + stateSourceObject.Load(5, &d.deleted) + stateSourceObject.Load(6, &d.mounted) + stateSourceObject.LoadValue(7, new(map[string]*Dirent), func(y interface{}) { d.loadChildren(y.(map[string]*Dirent)) }) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (c *DirentCache) StateTypeName() string { + return "pkg/sentry/fs.DirentCache" +} + +func (c *DirentCache) StateFields() []string { + return []string{ + "maxSize", + "limit", + } +} + +func (c *DirentCache) beforeSave() {} + +// +checklocksignore +func (c *DirentCache) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + if !state.IsZeroValue(&c.currentSize) { + state.Failf("currentSize is %#v, expected zero", &c.currentSize) + } + if !state.IsZeroValue(&c.list) { + state.Failf("list is %#v, expected zero", &c.list) + } + stateSinkObject.Save(0, &c.maxSize) + stateSinkObject.Save(1, &c.limit) +} + +func (c *DirentCache) afterLoad() {} + +// +checklocksignore +func (c *DirentCache) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.maxSize) + stateSourceObject.Load(1, &c.limit) +} + +func (d *DirentCacheLimiter) StateTypeName() string { + return "pkg/sentry/fs.DirentCacheLimiter" +} + +func (d *DirentCacheLimiter) StateFields() []string { + return []string{ + "max", + } +} + +func (d *DirentCacheLimiter) beforeSave() {} + +// +checklocksignore +func (d *DirentCacheLimiter) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + if !state.IsZeroValue(&d.count) { + state.Failf("count is %#v, expected zero", &d.count) + } + stateSinkObject.Save(0, &d.max) +} + +func (d *DirentCacheLimiter) afterLoad() {} + +// +checklocksignore +func (d *DirentCacheLimiter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.max) +} + +func (l *direntList) StateTypeName() string { + return "pkg/sentry/fs.direntList" +} + +func (l *direntList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *direntList) beforeSave() {} + +// +checklocksignore +func (l *direntList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *direntList) afterLoad() {} + +// +checklocksignore +func (l *direntList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *direntEntry) StateTypeName() string { + return "pkg/sentry/fs.direntEntry" +} + +func (e *direntEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *direntEntry) beforeSave() {} + +// +checklocksignore +func (e *direntEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *direntEntry) afterLoad() {} + +// +checklocksignore +func (e *direntEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (l *eventList) StateTypeName() string { + return "pkg/sentry/fs.eventList" +} + +func (l *eventList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *eventList) beforeSave() {} + +// +checklocksignore +func (l *eventList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *eventList) afterLoad() {} + +// +checklocksignore +func (l *eventList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *eventEntry) StateTypeName() string { + return "pkg/sentry/fs.eventEntry" +} + +func (e *eventEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *eventEntry) beforeSave() {} + +// +checklocksignore +func (e *eventEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *eventEntry) afterLoad() {} + +// +checklocksignore +func (e *eventEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (f *File) StateTypeName() string { + return "pkg/sentry/fs.File" +} + +func (f *File) StateFields() []string { + return []string{ + "AtomicRefCount", + "UniqueID", + "Dirent", + "flags", + "async", + "FileOperations", + "offset", + } +} + +// +checklocksignore +func (f *File) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.AtomicRefCount) + stateSinkObject.Save(1, &f.UniqueID) + stateSinkObject.Save(2, &f.Dirent) + stateSinkObject.Save(3, &f.flags) + stateSinkObject.Save(4, &f.async) + stateSinkObject.Save(5, &f.FileOperations) + stateSinkObject.Save(6, &f.offset) +} + +// +checklocksignore +func (f *File) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.AtomicRefCount) + stateSourceObject.Load(1, &f.UniqueID) + stateSourceObject.Load(2, &f.Dirent) + stateSourceObject.Load(3, &f.flags) + stateSourceObject.Load(4, &f.async) + stateSourceObject.LoadWait(5, &f.FileOperations) + stateSourceObject.Load(6, &f.offset) + stateSourceObject.AfterLoad(f.afterLoad) +} + +func (f *overlayFileOperations) StateTypeName() string { + return "pkg/sentry/fs.overlayFileOperations" +} + +func (f *overlayFileOperations) StateFields() []string { + return []string{ + "upper", + "lower", + "dirCursor", + } +} + +func (f *overlayFileOperations) beforeSave() {} + +// +checklocksignore +func (f *overlayFileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.upper) + stateSinkObject.Save(1, &f.lower) + stateSinkObject.Save(2, &f.dirCursor) +} + +func (f *overlayFileOperations) afterLoad() {} + +// +checklocksignore +func (f *overlayFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.upper) + stateSourceObject.Load(1, &f.lower) + stateSourceObject.Load(2, &f.dirCursor) +} + +func (omi *overlayMappingIdentity) StateTypeName() string { + return "pkg/sentry/fs.overlayMappingIdentity" +} + +func (omi *overlayMappingIdentity) StateFields() []string { + return []string{ + "AtomicRefCount", + "id", + "overlayFile", + } +} + +func (omi *overlayMappingIdentity) beforeSave() {} + +// +checklocksignore +func (omi *overlayMappingIdentity) StateSave(stateSinkObject state.Sink) { + omi.beforeSave() + stateSinkObject.Save(0, &omi.AtomicRefCount) + stateSinkObject.Save(1, &omi.id) + stateSinkObject.Save(2, &omi.overlayFile) +} + +func (omi *overlayMappingIdentity) afterLoad() {} + +// +checklocksignore +func (omi *overlayMappingIdentity) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &omi.AtomicRefCount) + stateSourceObject.Load(1, &omi.id) + stateSourceObject.Load(2, &omi.overlayFile) +} + +func (m *MountSourceFlags) StateTypeName() string { + return "pkg/sentry/fs.MountSourceFlags" +} + +func (m *MountSourceFlags) StateFields() []string { + return []string{ + "ReadOnly", + "NoAtime", + "ForcePageCache", + "NoExec", + } +} + +func (m *MountSourceFlags) beforeSave() {} + +// +checklocksignore +func (m *MountSourceFlags) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.ReadOnly) + stateSinkObject.Save(1, &m.NoAtime) + stateSinkObject.Save(2, &m.ForcePageCache) + stateSinkObject.Save(3, &m.NoExec) +} + +func (m *MountSourceFlags) afterLoad() {} + +// +checklocksignore +func (m *MountSourceFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.ReadOnly) + stateSourceObject.Load(1, &m.NoAtime) + stateSourceObject.Load(2, &m.ForcePageCache) + stateSourceObject.Load(3, &m.NoExec) +} + +func (f *FileFlags) StateTypeName() string { + return "pkg/sentry/fs.FileFlags" +} + +func (f *FileFlags) StateFields() []string { + return []string{ + "Direct", + "NonBlocking", + "DSync", + "Sync", + "Append", + "Read", + "Write", + "Pread", + "Pwrite", + "Directory", + "Async", + "LargeFile", + "NonSeekable", + "Truncate", + } +} + +func (f *FileFlags) beforeSave() {} + +// +checklocksignore +func (f *FileFlags) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Direct) + stateSinkObject.Save(1, &f.NonBlocking) + stateSinkObject.Save(2, &f.DSync) + stateSinkObject.Save(3, &f.Sync) + stateSinkObject.Save(4, &f.Append) + stateSinkObject.Save(5, &f.Read) + stateSinkObject.Save(6, &f.Write) + stateSinkObject.Save(7, &f.Pread) + stateSinkObject.Save(8, &f.Pwrite) + stateSinkObject.Save(9, &f.Directory) + stateSinkObject.Save(10, &f.Async) + stateSinkObject.Save(11, &f.LargeFile) + stateSinkObject.Save(12, &f.NonSeekable) + stateSinkObject.Save(13, &f.Truncate) +} + +func (f *FileFlags) afterLoad() {} + +// +checklocksignore +func (f *FileFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Direct) + stateSourceObject.Load(1, &f.NonBlocking) + stateSourceObject.Load(2, &f.DSync) + stateSourceObject.Load(3, &f.Sync) + stateSourceObject.Load(4, &f.Append) + stateSourceObject.Load(5, &f.Read) + stateSourceObject.Load(6, &f.Write) + stateSourceObject.Load(7, &f.Pread) + stateSourceObject.Load(8, &f.Pwrite) + stateSourceObject.Load(9, &f.Directory) + stateSourceObject.Load(10, &f.Async) + stateSourceObject.Load(11, &f.LargeFile) + stateSourceObject.Load(12, &f.NonSeekable) + stateSourceObject.Load(13, &f.Truncate) +} + +func (i *Inode) StateTypeName() string { + return "pkg/sentry/fs.Inode" +} + +func (i *Inode) StateFields() []string { + return []string{ + "AtomicRefCount", + "InodeOperations", + "StableAttr", + "LockCtx", + "Watches", + "MountSource", + "overlay", + } +} + +func (i *Inode) beforeSave() {} + +// +checklocksignore +func (i *Inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.AtomicRefCount) + stateSinkObject.Save(1, &i.InodeOperations) + stateSinkObject.Save(2, &i.StableAttr) + stateSinkObject.Save(3, &i.LockCtx) + stateSinkObject.Save(4, &i.Watches) + stateSinkObject.Save(5, &i.MountSource) + stateSinkObject.Save(6, &i.overlay) +} + +func (i *Inode) afterLoad() {} + +// +checklocksignore +func (i *Inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.AtomicRefCount) + stateSourceObject.Load(1, &i.InodeOperations) + stateSourceObject.Load(2, &i.StableAttr) + stateSourceObject.Load(3, &i.LockCtx) + stateSourceObject.Load(4, &i.Watches) + stateSourceObject.Load(5, &i.MountSource) + stateSourceObject.Load(6, &i.overlay) +} + +func (l *LockCtx) StateTypeName() string { + return "pkg/sentry/fs.LockCtx" +} + +func (l *LockCtx) StateFields() []string { + return []string{ + "Posix", + "BSD", + } +} + +func (l *LockCtx) beforeSave() {} + +// +checklocksignore +func (l *LockCtx) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Posix) + stateSinkObject.Save(1, &l.BSD) +} + +func (l *LockCtx) afterLoad() {} + +// +checklocksignore +func (l *LockCtx) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Posix) + stateSourceObject.Load(1, &l.BSD) +} + +func (w *Watches) StateTypeName() string { + return "pkg/sentry/fs.Watches" +} + +func (w *Watches) StateFields() []string { + return []string{ + "ws", + "unlinked", + } +} + +func (w *Watches) beforeSave() {} + +// +checklocksignore +func (w *Watches) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.ws) + stateSinkObject.Save(1, &w.unlinked) +} + +func (w *Watches) afterLoad() {} + +// +checklocksignore +func (w *Watches) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.ws) + stateSourceObject.Load(1, &w.unlinked) +} + +func (i *Inotify) StateTypeName() string { + return "pkg/sentry/fs.Inotify" +} + +func (i *Inotify) StateFields() []string { + return []string{ + "id", + "events", + "scratch", + "nextWatch", + "watches", + } +} + +func (i *Inotify) beforeSave() {} + +// +checklocksignore +func (i *Inotify) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.id) + stateSinkObject.Save(1, &i.events) + stateSinkObject.Save(2, &i.scratch) + stateSinkObject.Save(3, &i.nextWatch) + stateSinkObject.Save(4, &i.watches) +} + +func (i *Inotify) afterLoad() {} + +// +checklocksignore +func (i *Inotify) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.id) + stateSourceObject.Load(1, &i.events) + stateSourceObject.Load(2, &i.scratch) + stateSourceObject.Load(3, &i.nextWatch) + stateSourceObject.Load(4, &i.watches) +} + +func (e *Event) StateTypeName() string { + return "pkg/sentry/fs.Event" +} + +func (e *Event) StateFields() []string { + return []string{ + "eventEntry", + "wd", + "mask", + "cookie", + "len", + "name", + } +} + +func (e *Event) beforeSave() {} + +// +checklocksignore +func (e *Event) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.eventEntry) + stateSinkObject.Save(1, &e.wd) + stateSinkObject.Save(2, &e.mask) + stateSinkObject.Save(3, &e.cookie) + stateSinkObject.Save(4, &e.len) + stateSinkObject.Save(5, &e.name) +} + +func (e *Event) afterLoad() {} + +// +checklocksignore +func (e *Event) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.eventEntry) + stateSourceObject.Load(1, &e.wd) + stateSourceObject.Load(2, &e.mask) + stateSourceObject.Load(3, &e.cookie) + stateSourceObject.Load(4, &e.len) + stateSourceObject.Load(5, &e.name) +} + +func (w *Watch) StateTypeName() string { + return "pkg/sentry/fs.Watch" +} + +func (w *Watch) StateFields() []string { + return []string{ + "owner", + "wd", + "target", + "unpinned", + "mask", + "pins", + } +} + +func (w *Watch) beforeSave() {} + +// +checklocksignore +func (w *Watch) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.owner) + stateSinkObject.Save(1, &w.wd) + stateSinkObject.Save(2, &w.target) + stateSinkObject.Save(3, &w.unpinned) + stateSinkObject.Save(4, &w.mask) + stateSinkObject.Save(5, &w.pins) +} + +func (w *Watch) afterLoad() {} + +// +checklocksignore +func (w *Watch) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.owner) + stateSourceObject.Load(1, &w.wd) + stateSourceObject.Load(2, &w.target) + stateSourceObject.Load(3, &w.unpinned) + stateSourceObject.Load(4, &w.mask) + stateSourceObject.Load(5, &w.pins) +} + +func (msrc *MountSource) StateTypeName() string { + return "pkg/sentry/fs.MountSource" +} + +func (msrc *MountSource) StateFields() []string { + return []string{ + "AtomicRefCount", + "MountSourceOperations", + "FilesystemType", + "Flags", + "fscache", + "direntRefs", + } +} + +func (msrc *MountSource) beforeSave() {} + +// +checklocksignore +func (msrc *MountSource) StateSave(stateSinkObject state.Sink) { + msrc.beforeSave() + stateSinkObject.Save(0, &msrc.AtomicRefCount) + stateSinkObject.Save(1, &msrc.MountSourceOperations) + stateSinkObject.Save(2, &msrc.FilesystemType) + stateSinkObject.Save(3, &msrc.Flags) + stateSinkObject.Save(4, &msrc.fscache) + stateSinkObject.Save(5, &msrc.direntRefs) +} + +func (msrc *MountSource) afterLoad() {} + +// +checklocksignore +func (msrc *MountSource) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &msrc.AtomicRefCount) + stateSourceObject.Load(1, &msrc.MountSourceOperations) + stateSourceObject.Load(2, &msrc.FilesystemType) + stateSourceObject.Load(3, &msrc.Flags) + stateSourceObject.Load(4, &msrc.fscache) + stateSourceObject.Load(5, &msrc.direntRefs) +} + +func (smo *SimpleMountSourceOperations) StateTypeName() string { + return "pkg/sentry/fs.SimpleMountSourceOperations" +} + +func (smo *SimpleMountSourceOperations) StateFields() []string { + return []string{ + "keep", + "revalidate", + "cacheReaddir", + } +} + +func (smo *SimpleMountSourceOperations) beforeSave() {} + +// +checklocksignore +func (smo *SimpleMountSourceOperations) StateSave(stateSinkObject state.Sink) { + smo.beforeSave() + stateSinkObject.Save(0, &smo.keep) + stateSinkObject.Save(1, &smo.revalidate) + stateSinkObject.Save(2, &smo.cacheReaddir) +} + +func (smo *SimpleMountSourceOperations) afterLoad() {} + +// +checklocksignore +func (smo *SimpleMountSourceOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &smo.keep) + stateSourceObject.Load(1, &smo.revalidate) + stateSourceObject.Load(2, &smo.cacheReaddir) +} + +func (o *overlayMountSourceOperations) StateTypeName() string { + return "pkg/sentry/fs.overlayMountSourceOperations" +} + +func (o *overlayMountSourceOperations) StateFields() []string { + return []string{ + "upper", + "lower", + } +} + +func (o *overlayMountSourceOperations) beforeSave() {} + +// +checklocksignore +func (o *overlayMountSourceOperations) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.upper) + stateSinkObject.Save(1, &o.lower) +} + +func (o *overlayMountSourceOperations) afterLoad() {} + +// +checklocksignore +func (o *overlayMountSourceOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.upper) + stateSourceObject.Load(1, &o.lower) +} + +func (ofs *overlayFilesystem) StateTypeName() string { + return "pkg/sentry/fs.overlayFilesystem" +} + +func (ofs *overlayFilesystem) StateFields() []string { + return []string{} +} + +func (ofs *overlayFilesystem) beforeSave() {} + +// +checklocksignore +func (ofs *overlayFilesystem) StateSave(stateSinkObject state.Sink) { + ofs.beforeSave() +} + +func (ofs *overlayFilesystem) afterLoad() {} + +// +checklocksignore +func (ofs *overlayFilesystem) StateLoad(stateSourceObject state.Source) { +} + +func (m *Mount) StateTypeName() string { + return "pkg/sentry/fs.Mount" +} + +func (m *Mount) StateFields() []string { + return []string{ + "ID", + "ParentID", + "root", + "previous", + } +} + +func (m *Mount) beforeSave() {} + +// +checklocksignore +func (m *Mount) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.ID) + stateSinkObject.Save(1, &m.ParentID) + stateSinkObject.Save(2, &m.root) + stateSinkObject.Save(3, &m.previous) +} + +func (m *Mount) afterLoad() {} + +// +checklocksignore +func (m *Mount) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.ID) + stateSourceObject.Load(1, &m.ParentID) + stateSourceObject.Load(2, &m.root) + stateSourceObject.Load(3, &m.previous) +} + +func (mns *MountNamespace) StateTypeName() string { + return "pkg/sentry/fs.MountNamespace" +} + +func (mns *MountNamespace) StateFields() []string { + return []string{ + "AtomicRefCount", + "userns", + "root", + "mounts", + "mountID", + } +} + +func (mns *MountNamespace) beforeSave() {} + +// +checklocksignore +func (mns *MountNamespace) StateSave(stateSinkObject state.Sink) { + mns.beforeSave() + stateSinkObject.Save(0, &mns.AtomicRefCount) + stateSinkObject.Save(1, &mns.userns) + stateSinkObject.Save(2, &mns.root) + stateSinkObject.Save(3, &mns.mounts) + stateSinkObject.Save(4, &mns.mountID) +} + +func (mns *MountNamespace) afterLoad() {} + +// +checklocksignore +func (mns *MountNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mns.AtomicRefCount) + stateSourceObject.Load(1, &mns.userns) + stateSourceObject.Load(2, &mns.root) + stateSourceObject.Load(3, &mns.mounts) + stateSourceObject.Load(4, &mns.mountID) +} + +func (o *overlayEntry) StateTypeName() string { + return "pkg/sentry/fs.overlayEntry" +} + +func (o *overlayEntry) StateFields() []string { + return []string{ + "lowerExists", + "lower", + "mappings", + "upper", + "dirCache", + } +} + +func (o *overlayEntry) beforeSave() {} + +// +checklocksignore +func (o *overlayEntry) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.lowerExists) + stateSinkObject.Save(1, &o.lower) + stateSinkObject.Save(2, &o.mappings) + stateSinkObject.Save(3, &o.upper) + stateSinkObject.Save(4, &o.dirCache) +} + +func (o *overlayEntry) afterLoad() {} + +// +checklocksignore +func (o *overlayEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.lowerExists) + stateSourceObject.Load(1, &o.lower) + stateSourceObject.Load(2, &o.mappings) + stateSourceObject.Load(3, &o.upper) + stateSourceObject.Load(4, &o.dirCache) +} + +func init() { + state.Register((*StableAttr)(nil)) + state.Register((*UnstableAttr)(nil)) + state.Register((*AttrMask)(nil)) + state.Register((*PermMask)(nil)) + state.Register((*FilePermissions)(nil)) + state.Register((*FileOwner)(nil)) + state.Register((*DentAttr)(nil)) + state.Register((*SortedDentryMap)(nil)) + state.Register((*Dirent)(nil)) + state.Register((*DirentCache)(nil)) + state.Register((*DirentCacheLimiter)(nil)) + state.Register((*direntList)(nil)) + state.Register((*direntEntry)(nil)) + state.Register((*eventList)(nil)) + state.Register((*eventEntry)(nil)) + state.Register((*File)(nil)) + state.Register((*overlayFileOperations)(nil)) + state.Register((*overlayMappingIdentity)(nil)) + state.Register((*MountSourceFlags)(nil)) + state.Register((*FileFlags)(nil)) + state.Register((*Inode)(nil)) + state.Register((*LockCtx)(nil)) + state.Register((*Watches)(nil)) + state.Register((*Inotify)(nil)) + state.Register((*Event)(nil)) + state.Register((*Watch)(nil)) + state.Register((*MountSource)(nil)) + state.Register((*SimpleMountSourceOperations)(nil)) + state.Register((*overlayMountSourceOperations)(nil)) + state.Register((*overlayFilesystem)(nil)) + state.Register((*Mount)(nil)) + state.Register((*MountNamespace)(nil)) + state.Register((*overlayEntry)(nil)) +} diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD deleted file mode 100644 index 6bf2d51cb..000000000 --- a/pkg/sentry/fs/fsutil/BUILD +++ /dev/null @@ -1,119 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "dirty_set_impl", - out = "dirty_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "fsutil", - prefix = "Dirty", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.MappableRange", - "Value": "DirtyInfo", - "Functions": "dirtySetFunctions", - }, -) - -go_template_instance( - name = "frame_ref_set_impl", - out = "frame_ref_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "fsutil", - prefix = "FrameRef", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.FileRange", - "Value": "uint64", - "Functions": "FrameRefSetFunctions", - }, -) - -go_template_instance( - name = "file_range_set_impl", - out = "file_range_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "fsutil", - prefix = "FileRange", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.MappableRange", - "Value": "uint64", - "Functions": "FileRangeSetFunctions", - }, -) - -go_library( - name = "fsutil", - srcs = [ - "dirty_set.go", - "dirty_set_impl.go", - "file.go", - "file_range_set.go", - "file_range_set_impl.go", - "frame_ref_set.go", - "frame_ref_set_impl.go", - "fsutil.go", - "host_file_mapper.go", - "host_file_mapper_state.go", - "host_file_mapper_unsafe.go", - "host_mappable.go", - "inode.go", - "inode_cached.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/state", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "fsutil_test", - size = "small", - srcs = [ - "dirty_set_test.go", - "inode_cached_test.go", - ], - library = ":fsutil", - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/safemem", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/fsutil/README.md b/pkg/sentry/fs/fsutil/README.md deleted file mode 100644 index 8be367334..000000000 --- a/pkg/sentry/fs/fsutil/README.md +++ /dev/null @@ -1,207 +0,0 @@ -This package provides utilities for implementing virtual filesystem objects. - -[TOC] - -## Page cache - -`CachingInodeOperations` implements a page cache for files that cannot use the -host page cache. Normally these are files that store their data in a remote -filesystem. This also applies to files that are accessed on a platform that does -not support directly memory mapping host file descriptors (e.g. the ptrace -platform). - -An `CachingInodeOperations` buffers regions of a single file into memory. It is -owned by an `fs.Inode`, the in-memory representation of a file (all open file -descriptors are backed by an `fs.Inode`). The `fs.Inode` provides operations for -reading memory into an `CachingInodeOperations`, to represent the contents of -the file in-memory, and for writing memory out, to relieve memory pressure on -the kernel and to synchronize in-memory changes to filesystems. - -An `CachingInodeOperations` enables readable and/or writable memory access to -file content. Files can be mapped shared or private, see mmap(2). When a file is -mapped shared, changes to the file via write(2) and truncate(2) are reflected in -the shared memory region. Conversely, when the shared memory region is modified, -changes to the file are visible via read(2). Multiple shared mappings of the -same file are coherent with each other. This is consistent with Linux. - -When a file is mapped private, updates to the mapped memory are not visible to -other memory mappings. Updates to the mapped memory are also not reflected in -the file content as seen by read(2). If the file is changed after a private -mapping is created, for instance by write(2), the change to the file may or may -not be reflected in the private mapping. This is consistent with Linux. - -An `CachingInodeOperations` keeps track of ranges of memory that were modified -(or "dirtied"). When the file is explicitly synced via fsync(2), only the dirty -ranges are written out to the filesystem. Any error returned indicates a failure -to write all dirty memory of an `CachingInodeOperations` to the filesystem. In -this case the filesystem may be in an inconsistent state. The same operation can -be performed on the shared memory itself using msync(2). If neither fsync(2) nor -msync(2) is performed, then the dirty memory is written out in accordance with -the `CachingInodeOperations` eviction strategy (see below) and there is no -guarantee that memory will be written out successfully in full. - -### Memory allocation and eviction - -An `CachingInodeOperations` implements the following allocation and eviction -strategy: - -- Memory is allocated and brought up to date with the contents of a file when - a region of mapped memory is accessed (or "faulted on"). - -- Dirty memory is written out to filesystems when an fsync(2) or msync(2) - operation is performed on a memory mapped file, for all memory mapped files - when saved, and/or when there are no longer any memory mappings of a range - of a file, see munmap(2). As the latter implies, in the absence of a panic - or SIGKILL, dirty memory is written out for all memory mapped files when an - application exits. - -- Memory is freed when there are no longer any memory mappings of a range of a - file (e.g. when an application exits). This behavior is consistent with - Linux for shared memory that has been locked via mlock(2). - -Notably, memory is not allocated for read(2) or write(2) operations. This means -that reads and writes to the file are only accelerated by an -`CachingInodeOperations` if the file being read or written has been memory -mapped *and* if the shared memory has been accessed at the region being read or -written. This diverges from Linux which buffers memory into a page cache on -read(2) proactively (i.e. readahead) and delays writing it out to filesystems on -write(2) (i.e. writeback). The absence of these optimizations is not visible to -applications beyond less than optimal performance when repeatedly reading and/or -writing to same region of a file. See [Future Work](#future-work) for plans to -implement these optimizations. - -Additionally, memory held by `CachingInodeOperationss` is currently unbounded in -size. An `CachingInodeOperations` does not write out dirty memory and free it -under system memory pressure. This can cause pathological memory usage. - -When memory is written back, an `CachingInodeOperations` may write regions of -shared memory that were never modified. This is due to the strategy of -minimizing page faults (see below) and handling only a subset of memory write -faults. In the absence of an application or sentry crash, it is guaranteed that -if a region of shared memory was written to, it is written back to a filesystem. - -### Life of a shared memory mapping - -A file is memory mapped via mmap(2). For example, if `A` is an address, an -application may execute: - -``` -mmap(A, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); -``` - -This creates a shared mapping of fd that reflects 4k of the contents of fd -starting at offset 0, accessible at address `A`. This in turn creates a virtual -memory area region ("vma") which indicates that [`A`, `A`+0x1000) is now a valid -address range for this application to access. - -At this point, memory has not been allocated in the file's -`CachingInodeOperations`. It is also the case that the address range [`A`, -`A`+0x1000) has not been mapped on the host on behalf of the application. If the -application then tries to modify 8 bytes of the shared memory: - -``` -char buffer[] = "aaaaaaaa"; -memcpy(A, buffer, 8); -``` - -The host then sends a `SIGSEGV` to the sentry because the address range [`A`, -`A`+8) is not mapped on the host. The `SIGSEGV` indicates that the memory was -accessed writable. The sentry looks up the vma associated with [`A`, `A`+8), -finds the file that was mapped and its `CachingInodeOperations`. It then calls -`CachingInodeOperations.Translate` which allocates memory to back [`A`, `A`+8). -It may choose to allocate more memory (i.e. do "readahead") to minimize -subsequent faults. - -Memory that is allocated comes from a host tmpfs file (see -`pgalloc.MemoryFile`). The host tmpfs file memory is brought up to date with the -contents of the mapped file on its filesystem. The region of the host tmpfs file -that reflects the mapped file is then mapped into the host address space of the -application so that subsequent memory accesses do not repeatedly generate a -`SIGSEGV`. - -The range that was allocated, including any extra memory allocation to minimize -faults, is marked dirty due to the write fault. This overcounts dirty memory if -the extra memory allocated is never modified. - -To make the scenario more interesting, imagine that this application spawns -another process and maps the same file in the exact same way: - -``` -mmap(A, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); -``` - -Imagine that this process then tries to modify the file again but with only 4 -bytes: - -``` -char buffer[] = "bbbb"; -memcpy(A, buffer, 4); -``` - -Since the first process has already mapped and accessed the same region of the -file writable, `CachingInodeOperations.Translate` is called but returns the -memory that has already been allocated rather than allocating new memory. The -address range [`A`, `A`+0x1000) reflects the same cached view of the file as the -first process sees. For example, reading 8 bytes from the file from either -process via read(2) starting at offset 0 returns a consistent "bbbbaaaa". - -When this process no longer needs the shared memory, it may do: - -``` -munmap(A, 0x1000); -``` - -At this point, the modified memory cached by the `CachingInodeOperations` is not -written back to the file because it is still in use by the first process that -mapped it. When the first process also does: - -``` -munmap(A, 0x1000); -``` - -Then the last memory mapping of the file at the range [0, 0x1000) is gone. The -file's `CachingInodeOperations` then starts writing back memory marked dirty to -the file on its filesystem. Once writing completes, regardless of whether it was -successful, the `CachingInodeOperations` frees the memory cached at the range -[0, 0x1000). - -Subsequent read(2) or write(2) operations on the file go directly to the -filesystem since there no longer exists memory for it in its -`CachingInodeOperations`. - -## Future Work - -### Page cache - -The sentry does not yet implement the readahead and writeback optimizations for -read(2) and write(2) respectively. To do so, on read(2) and/or write(2) the -sentry must ensure that memory is allocated in a page cache to read or write -into. However, the sentry cannot boundlessly allocate memory. If it did, the -host would eventually OOM-kill the sentry+application process. This means that -the sentry must implement a page cache memory allocation strategy that is -bounded by a global user or container imposed limit. When this limit is -approached, the sentry must decide from which page cache memory should be freed -so that it can allocate more memory. If it makes a poor decision, the sentry may -end up freeing and re-allocating memory to back regions of files that are -frequently used, nullifying the optimization (and in some cases causing worse -performance due to the overhead of memory allocation and general management). -This is a form of "cache thrashing". - -In Linux, much research has been done to select and implement a lightweight but -optimal page cache eviction algorithm. Linux makes use of hardware page bits to -keep track of whether memory has been accessed. The sentry does not have direct -access to hardware. Implementing a similarly lightweight and optimal page cache -eviction algorithm will need to either introduce a kernel interface to obtain -these page bits or find a suitable alternative proxy for access events. - -In Linux, readahead happens by default but is not always ideal. For instance, -for files that are not read sequentially, it would be more ideal to simply read -from only those regions of the file rather than to optimistically cache some -number of bytes ahead of the read (up to 2MB in Linux) if the bytes cached won't -be accessed. Linux implements the fadvise64(2) system call for applications to -specify that a range of a file will not be accessed sequentially. The advice bit -FADV_RANDOM turns off the readahead optimization for the given range in the -given file. However fadvise64 is rarely used by applications so Linux implements -a readahead backoff strategy if reads are not sequential. To ensure that -application performance is not degraded, the sentry must implement a similar -backoff strategy. diff --git a/pkg/sentry/fs/fsutil/dirty_set_impl.go b/pkg/sentry/fs/fsutil/dirty_set_impl.go new file mode 100644 index 000000000..2c6a10fc4 --- /dev/null +++ b/pkg/sentry/fs/fsutil/dirty_set_impl.go @@ -0,0 +1,1647 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const DirtytrackGaps = 0 + +var _ = uint8(DirtytrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type DirtydynamicGap [DirtytrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *DirtydynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *DirtydynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + DirtyminDegree = 3 + + DirtymaxDegree = 2 * DirtyminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type DirtySet struct { + root Dirtynode `state:".(*DirtySegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *DirtySet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *DirtySet) IsEmptyRange(r __generics_imported0.MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *DirtySet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *DirtySet) SpanRange(r __generics_imported0.MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *DirtySet) FirstSegment() DirtyIterator { + if s.root.nrSegments == 0 { + return DirtyIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *DirtySet) LastSegment() DirtyIterator { + if s.root.nrSegments == 0 { + return DirtyIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *DirtySet) FirstGap() DirtyGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return DirtyGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *DirtySet) LastGap() DirtyGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return DirtyGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *DirtySet) Find(key uint64) (DirtyIterator, DirtyGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return DirtyIterator{n, i}, DirtyGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return DirtyIterator{}, DirtyGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *DirtySet) FindSegment(key uint64) DirtyIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *DirtySet) LowerBoundSegment(min uint64) DirtyIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *DirtySet) UpperBoundSegment(max uint64) DirtyIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *DirtySet) FindGap(key uint64) DirtyGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *DirtySet) LowerBoundGap(min uint64) DirtyGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *DirtySet) UpperBoundGap(max uint64) DirtyGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *DirtySet) Add(r __generics_imported0.MappableRange, val DirtyInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *DirtySet) AddWithoutMerging(r __generics_imported0.MappableRange, val DirtyInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *DirtySet) Insert(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := DirtytrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (dirtySetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := DirtytrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *DirtySet) InsertWithoutMerging(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *DirtySet) InsertWithoutMergingUnchecked(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := DirtytrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return DirtyIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *DirtySet) Remove(seg DirtyIterator) DirtyGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if DirtytrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + dirtySetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if DirtytrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(DirtyGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *DirtySet) RemoveAll() { + s.root = Dirtynode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *DirtySet) RemoveRange(r __generics_imported0.MappableRange) DirtyGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *DirtySet) Merge(first, second DirtyIterator) DirtyIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *DirtySet) MergeUnchecked(first, second DirtyIterator) DirtyIterator { + if first.End() == second.Start() { + if mval, ok := (dirtySetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return DirtyIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *DirtySet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *DirtySet) MergeRange(r __generics_imported0.MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *DirtySet) MergeAdjacent(r __generics_imported0.MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *DirtySet) Split(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *DirtySet) SplitUnchecked(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) { + val1, val2 := (dirtySetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *DirtySet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *DirtySet) Isolate(seg DirtyIterator, r __generics_imported0.MappableRange) DirtyIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *DirtySet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg DirtyIterator)) DirtyGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return DirtyGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return DirtyGapIterator{} + } + } +} + +// +stateify savable +type Dirtynode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Dirtynode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap DirtydynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [DirtymaxDegree - 1]__generics_imported0.MappableRange + values [DirtymaxDegree - 1]DirtyInfo + children [DirtymaxDegree]*Dirtynode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Dirtynode) firstSegment() DirtyIterator { + for n.hasChildren { + n = n.children[0] + } + return DirtyIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Dirtynode) lastSegment() DirtyIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return DirtyIterator{n, n.nrSegments - 1} +} + +func (n *Dirtynode) prevSibling() *Dirtynode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Dirtynode) nextSibling() *Dirtynode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Dirtynode) rebalanceBeforeInsert(gap DirtyGapIterator) DirtyGapIterator { + if n.nrSegments < DirtymaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:DirtyminDegree-1], n.keys[:DirtyminDegree-1]) + copy(left.values[:DirtyminDegree-1], n.values[:DirtyminDegree-1]) + copy(right.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:]) + copy(right.values[:DirtyminDegree-1], n.values[DirtyminDegree:]) + n.keys[0], n.values[0] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1] + DirtyzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:DirtyminDegree], n.children[:DirtyminDegree]) + copy(right.children[:DirtyminDegree], n.children[DirtyminDegree:]) + DirtyzeroNodeSlice(n.children[2:]) + for i := 0; i < DirtyminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if DirtytrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < DirtyminDegree { + return DirtyGapIterator{left, gap.index} + } + return DirtyGapIterator{right, gap.index - DirtyminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:]) + copy(sibling.values[:DirtyminDegree-1], n.values[DirtyminDegree:]) + DirtyzeroValueSlice(n.values[DirtyminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:DirtyminDegree], n.children[DirtyminDegree:]) + DirtyzeroNodeSlice(n.children[DirtyminDegree:]) + for i := 0; i < DirtyminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = DirtyminDegree - 1 + + if DirtytrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < DirtyminDegree { + return gap + } + return DirtyGapIterator{sibling, gap.index - DirtyminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Dirtynode) rebalanceAfterRemove(gap DirtyGapIterator) DirtyGapIterator { + for { + if n.nrSegments >= DirtyminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if DirtytrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return DirtyGapIterator{n, 0} + } + if gap.node == n { + return DirtyGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if DirtytrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return DirtyGapIterator{n, n.nrSegments} + } + return DirtyGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return DirtyGapIterator{p, gap.index} + } + if gap.node == right { + return DirtyGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Dirtynode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = DirtyGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + dirtySetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if DirtytrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *Dirtynode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *Dirtynode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *Dirtynode) calculateMaxGapLeaf() uint64 { + max := DirtyGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (DirtyGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *Dirtynode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Dirtynode) searchFirstLargeEnoughGap(minSize uint64) DirtyGapIterator { + if n.maxGap.Get() < minSize { + return DirtyGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := DirtyGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Dirtynode) searchLastLargeEnoughGap(minSize uint64) DirtyGapIterator { + if n.maxGap.Get() < minSize { + return DirtyGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := DirtyGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type DirtyIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Dirtynode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg DirtyIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg DirtyIterator) Range() __generics_imported0.MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg DirtyIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg DirtyIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg DirtyIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetRange(r __generics_imported0.MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg DirtyIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg DirtyIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg DirtyIterator) Value() DirtyInfo { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg DirtyIterator) ValuePtr() *DirtyInfo { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg DirtyIterator) SetValue(val DirtyInfo) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg DirtyIterator) PrevSegment() DirtyIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return DirtyIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return DirtyIterator{} + } + return DirtysegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg DirtyIterator) NextSegment() DirtyIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return DirtyIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return DirtyIterator{} + } + return DirtysegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg DirtyIterator) PrevGap() DirtyGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return DirtyGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg DirtyIterator) NextGap() DirtyGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return DirtyGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg DirtyIterator) PrevNonEmpty() (DirtyIterator, DirtyGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return DirtyIterator{}, gap + } + return gap.PrevSegment(), DirtyGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg DirtyIterator) NextNonEmpty() (DirtyIterator, DirtyGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return DirtyIterator{}, gap + } + return gap.NextSegment(), DirtyGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type DirtyGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Dirtynode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap DirtyGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap DirtyGapIterator) Range() __generics_imported0.MappableRange { + return __generics_imported0.MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap DirtyGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return dirtySetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap DirtyGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return dirtySetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap DirtyGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap DirtyGapIterator) PrevSegment() DirtyIterator { + return DirtysegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap DirtyGapIterator) NextSegment() DirtyIterator { + return DirtysegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap DirtyGapIterator) PrevGap() DirtyGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return DirtyGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap DirtyGapIterator) NextGap() DirtyGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return DirtyGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap DirtyGapIterator) NextLargeEnoughGap(minSize uint64) DirtyGapIterator { + if DirtytrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap DirtyGapIterator) nextLargeEnoughGapHelper(minSize uint64) DirtyGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return DirtyGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap DirtyGapIterator) PrevLargeEnoughGap(minSize uint64) DirtyGapIterator { + if DirtytrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap DirtyGapIterator) prevLargeEnoughGapHelper(minSize uint64) DirtyGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return DirtyGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func DirtysegmentBeforePosition(n *Dirtynode, i int) DirtyIterator { + for i == 0 { + if n.parent == nil { + return DirtyIterator{} + } + n, i = n.parent, n.parentIndex + } + return DirtyIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func DirtysegmentAfterPosition(n *Dirtynode, i int) DirtyIterator { + for i == n.nrSegments { + if n.parent == nil { + return DirtyIterator{} + } + n, i = n.parent, n.parentIndex + } + return DirtyIterator{n, i} +} + +func DirtyzeroValueSlice(slice []DirtyInfo) { + + for i := range slice { + dirtySetFunctions{}.ClearValue(&slice[i]) + } +} + +func DirtyzeroNodeSlice(slice []*Dirtynode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *DirtySet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Dirtynode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Dirtynode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if DirtytrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type DirtySegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []DirtyInfo +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *DirtySet) ExportSortedSlices() *DirtySegmentDataSlices { + var sds DirtySegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *DirtySet) ImportSortedSlices(sds *DirtySegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *DirtySet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.MappableRange, DirtyInfo) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *DirtySet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *DirtySet) saveRoot() *DirtySegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *DirtySet) loadRoot(sds *DirtySegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go deleted file mode 100644 index 48448c97c..000000000 --- a/pkg/sentry/fs/fsutil/dirty_set_test.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fsutil - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/memmap" -) - -func TestDirtySet(t *testing.T) { - var set DirtySet - set.MarkDirty(memmap.MappableRange{0, 2 * hostarch.PageSize}) - set.KeepDirty(memmap.MappableRange{hostarch.PageSize, 2 * hostarch.PageSize}) - set.MarkClean(memmap.MappableRange{0, 2 * hostarch.PageSize}) - want := &DirtySegmentDataSlices{ - Start: []uint64{hostarch.PageSize}, - End: []uint64{2 * hostarch.PageSize}, - Values: []DirtyInfo{{Keep: true}}, - } - if got := set.ExportSortedSlices(); !reflect.DeepEqual(got, want) { - t.Errorf("set:\n\tgot %v,\n\twant %v", got, want) - } -} diff --git a/pkg/sentry/fs/fsutil/file_range_set_impl.go b/pkg/sentry/fs/fsutil/file_range_set_impl.go new file mode 100644 index 000000000..7568fb790 --- /dev/null +++ b/pkg/sentry/fs/fsutil/file_range_set_impl.go @@ -0,0 +1,1647 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const FileRangetrackGaps = 0 + +var _ = uint8(FileRangetrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type FileRangedynamicGap [FileRangetrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FileRangedynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FileRangedynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + FileRangeminDegree = 3 + + FileRangemaxDegree = 2 * FileRangeminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type FileRangeSet struct { + root FileRangenode `state:".(*FileRangeSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *FileRangeSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *FileRangeSet) IsEmptyRange(r __generics_imported0.MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *FileRangeSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *FileRangeSet) SpanRange(r __generics_imported0.MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *FileRangeSet) FirstSegment() FileRangeIterator { + if s.root.nrSegments == 0 { + return FileRangeIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *FileRangeSet) LastSegment() FileRangeIterator { + if s.root.nrSegments == 0 { + return FileRangeIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *FileRangeSet) FirstGap() FileRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return FileRangeGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *FileRangeSet) LastGap() FileRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FileRangeGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *FileRangeSet) Find(key uint64) (FileRangeIterator, FileRangeGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return FileRangeIterator{n, i}, FileRangeGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return FileRangeIterator{}, FileRangeGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *FileRangeSet) FindSegment(key uint64) FileRangeIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *FileRangeSet) LowerBoundSegment(min uint64) FileRangeIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *FileRangeSet) UpperBoundSegment(max uint64) FileRangeIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *FileRangeSet) FindGap(key uint64) FileRangeGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *FileRangeSet) LowerBoundGap(min uint64) FileRangeGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *FileRangeSet) UpperBoundGap(max uint64) FileRangeGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *FileRangeSet) Add(r __generics_imported0.MappableRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *FileRangeSet) AddWithoutMerging(r __generics_imported0.MappableRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *FileRangeSet) Insert(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (FileRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := FileRangetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (FileRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (FileRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := FileRangetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *FileRangeSet) InsertWithoutMerging(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *FileRangeSet) InsertWithoutMergingUnchecked(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := FileRangetrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return FileRangeIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *FileRangeSet) Remove(seg FileRangeIterator) FileRangeGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if FileRangetrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + FileRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if FileRangetrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(FileRangeGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *FileRangeSet) RemoveAll() { + s.root = FileRangenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *FileRangeSet) RemoveRange(r __generics_imported0.MappableRange) FileRangeGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *FileRangeSet) Merge(first, second FileRangeIterator) FileRangeIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *FileRangeSet) MergeUnchecked(first, second FileRangeIterator) FileRangeIterator { + if first.End() == second.Start() { + if mval, ok := (FileRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return FileRangeIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *FileRangeSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *FileRangeSet) MergeRange(r __generics_imported0.MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *FileRangeSet) MergeAdjacent(r __generics_imported0.MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *FileRangeSet) Split(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *FileRangeSet) SplitUnchecked(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) { + val1, val2 := (FileRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *FileRangeSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *FileRangeSet) Isolate(seg FileRangeIterator, r __generics_imported0.MappableRange) FileRangeIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *FileRangeSet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg FileRangeIterator)) FileRangeGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return FileRangeGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return FileRangeGapIterator{} + } + } +} + +// +stateify savable +type FileRangenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *FileRangenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap FileRangedynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [FileRangemaxDegree - 1]__generics_imported0.MappableRange + values [FileRangemaxDegree - 1]uint64 + children [FileRangemaxDegree]*FileRangenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FileRangenode) firstSegment() FileRangeIterator { + for n.hasChildren { + n = n.children[0] + } + return FileRangeIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FileRangenode) lastSegment() FileRangeIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FileRangeIterator{n, n.nrSegments - 1} +} + +func (n *FileRangenode) prevSibling() *FileRangenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *FileRangenode) nextSibling() *FileRangenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *FileRangenode) rebalanceBeforeInsert(gap FileRangeGapIterator) FileRangeGapIterator { + if n.nrSegments < FileRangemaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:FileRangeminDegree-1], n.keys[:FileRangeminDegree-1]) + copy(left.values[:FileRangeminDegree-1], n.values[:FileRangeminDegree-1]) + copy(right.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:]) + copy(right.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:]) + n.keys[0], n.values[0] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1] + FileRangezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:FileRangeminDegree], n.children[:FileRangeminDegree]) + copy(right.children[:FileRangeminDegree], n.children[FileRangeminDegree:]) + FileRangezeroNodeSlice(n.children[2:]) + for i := 0; i < FileRangeminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if FileRangetrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < FileRangeminDegree { + return FileRangeGapIterator{left, gap.index} + } + return FileRangeGapIterator{right, gap.index - FileRangeminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:]) + copy(sibling.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:]) + FileRangezeroValueSlice(n.values[FileRangeminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:FileRangeminDegree], n.children[FileRangeminDegree:]) + FileRangezeroNodeSlice(n.children[FileRangeminDegree:]) + for i := 0; i < FileRangeminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = FileRangeminDegree - 1 + + if FileRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < FileRangeminDegree { + return gap + } + return FileRangeGapIterator{sibling, gap.index - FileRangeminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *FileRangenode) rebalanceAfterRemove(gap FileRangeGapIterator) FileRangeGapIterator { + for { + if n.nrSegments >= FileRangeminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + FileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FileRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return FileRangeGapIterator{n, 0} + } + if gap.node == n { + return FileRangeGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + FileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FileRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return FileRangeGapIterator{n, n.nrSegments} + } + return FileRangeGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return FileRangeGapIterator{p, gap.index} + } + if gap.node == right { + return FileRangeGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *FileRangenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = FileRangeGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + FileRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if FileRangetrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *FileRangenode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *FileRangenode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *FileRangenode) calculateMaxGapLeaf() uint64 { + max := FileRangeGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (FileRangeGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *FileRangenode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FileRangenode) searchFirstLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if n.maxGap.Get() < minSize { + return FileRangeGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := FileRangeGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FileRangenode) searchLastLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if n.maxGap.Get() < minSize { + return FileRangeGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := FileRangeGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FileRangeIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *FileRangenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg FileRangeIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg FileRangeIterator) Range() __generics_imported0.MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg FileRangeIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg FileRangeIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg FileRangeIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetRange(r __generics_imported0.MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg FileRangeIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg FileRangeIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg FileRangeIterator) Value() uint64 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg FileRangeIterator) ValuePtr() *uint64 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg FileRangeIterator) SetValue(val uint64) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg FileRangeIterator) PrevSegment() FileRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return FileRangeIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return FileRangeIterator{} + } + return FileRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg FileRangeIterator) NextSegment() FileRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return FileRangeIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return FileRangeIterator{} + } + return FileRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg FileRangeIterator) PrevGap() FileRangeGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return FileRangeGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg FileRangeIterator) NextGap() FileRangeGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return FileRangeGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg FileRangeIterator) PrevNonEmpty() (FileRangeIterator, FileRangeGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return FileRangeIterator{}, gap + } + return gap.PrevSegment(), FileRangeGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg FileRangeIterator) NextNonEmpty() (FileRangeIterator, FileRangeGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return FileRangeIterator{}, gap + } + return gap.NextSegment(), FileRangeGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FileRangeGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *FileRangenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap FileRangeGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap FileRangeGapIterator) Range() __generics_imported0.MappableRange { + return __generics_imported0.MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap FileRangeGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return FileRangeSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap FileRangeGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return FileRangeSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap FileRangeGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap FileRangeGapIterator) PrevSegment() FileRangeIterator { + return FileRangesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap FileRangeGapIterator) NextSegment() FileRangeIterator { + return FileRangesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap FileRangeGapIterator) PrevGap() FileRangeGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return FileRangeGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap FileRangeGapIterator) NextGap() FileRangeGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return FileRangeGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FileRangeGapIterator) NextLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if FileRangetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap FileRangeGapIterator) nextLargeEnoughGapHelper(minSize uint64) FileRangeGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FileRangeGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FileRangeGapIterator) PrevLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if FileRangetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap FileRangeGapIterator) prevLargeEnoughGapHelper(minSize uint64) FileRangeGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FileRangeGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func FileRangesegmentBeforePosition(n *FileRangenode, i int) FileRangeIterator { + for i == 0 { + if n.parent == nil { + return FileRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return FileRangeIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func FileRangesegmentAfterPosition(n *FileRangenode, i int) FileRangeIterator { + for i == n.nrSegments { + if n.parent == nil { + return FileRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return FileRangeIterator{n, i} +} + +func FileRangezeroValueSlice(slice []uint64) { + + for i := range slice { + FileRangeSetFunctions{}.ClearValue(&slice[i]) + } +} + +func FileRangezeroNodeSlice(slice []*FileRangenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *FileRangeSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *FileRangenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *FileRangenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if FileRangetrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type FileRangeSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []uint64 +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *FileRangeSet) ExportSortedSlices() *FileRangeSegmentDataSlices { + var sds FileRangeSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *FileRangeSet) ImportSortedSlices(sds *FileRangeSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *FileRangeSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.MappableRange, uint64) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *FileRangeSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *FileRangeSet) saveRoot() *FileRangeSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *FileRangeSet) loadRoot(sds *FileRangeSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/frame_ref_set_impl.go b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go new file mode 100644 index 000000000..6657addf4 --- /dev/null +++ b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go @@ -0,0 +1,1647 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const FrameReftrackGaps = 0 + +var _ = uint8(FrameReftrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type FrameRefdynamicGap [FrameReftrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FrameRefdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FrameRefdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + FrameRefminDegree = 3 + + FrameRefmaxDegree = 2 * FrameRefminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type FrameRefSet struct { + root FrameRefnode `state:".(*FrameRefSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *FrameRefSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *FrameRefSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *FrameRefSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *FrameRefSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *FrameRefSet) FirstSegment() FrameRefIterator { + if s.root.nrSegments == 0 { + return FrameRefIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *FrameRefSet) LastSegment() FrameRefIterator { + if s.root.nrSegments == 0 { + return FrameRefIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *FrameRefSet) FirstGap() FrameRefGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return FrameRefGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *FrameRefSet) LastGap() FrameRefGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FrameRefGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *FrameRefSet) Find(key uint64) (FrameRefIterator, FrameRefGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return FrameRefIterator{n, i}, FrameRefGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return FrameRefIterator{}, FrameRefGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *FrameRefSet) FindSegment(key uint64) FrameRefIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *FrameRefSet) LowerBoundSegment(min uint64) FrameRefIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *FrameRefSet) UpperBoundSegment(max uint64) FrameRefIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *FrameRefSet) FindGap(key uint64) FrameRefGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *FrameRefSet) LowerBoundGap(min uint64) FrameRefGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *FrameRefSet) UpperBoundGap(max uint64) FrameRefGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *FrameRefSet) Add(r __generics_imported0.FileRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *FrameRefSet) AddWithoutMerging(r __generics_imported0.FileRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *FrameRefSet) Insert(gap FrameRefGapIterator, r __generics_imported0.FileRange, val uint64) FrameRefIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (FrameRefSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := FrameReftrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (FrameRefSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (FrameRefSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := FrameReftrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *FrameRefSet) InsertWithoutMerging(gap FrameRefGapIterator, r __generics_imported0.FileRange, val uint64) FrameRefIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *FrameRefSet) InsertWithoutMergingUnchecked(gap FrameRefGapIterator, r __generics_imported0.FileRange, val uint64) FrameRefIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := FrameReftrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return FrameRefIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *FrameRefSet) Remove(seg FrameRefIterator) FrameRefGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if FrameReftrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + FrameRefSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if FrameReftrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(FrameRefGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *FrameRefSet) RemoveAll() { + s.root = FrameRefnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *FrameRefSet) RemoveRange(r __generics_imported0.FileRange) FrameRefGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *FrameRefSet) Merge(first, second FrameRefIterator) FrameRefIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *FrameRefSet) MergeUnchecked(first, second FrameRefIterator) FrameRefIterator { + if first.End() == second.Start() { + if mval, ok := (FrameRefSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return FrameRefIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *FrameRefSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *FrameRefSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *FrameRefSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *FrameRefSet) Split(seg FrameRefIterator, split uint64) (FrameRefIterator, FrameRefIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *FrameRefSet) SplitUnchecked(seg FrameRefIterator, split uint64) (FrameRefIterator, FrameRefIterator) { + val1, val2 := (FrameRefSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *FrameRefSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *FrameRefSet) Isolate(seg FrameRefIterator, r __generics_imported0.FileRange) FrameRefIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *FrameRefSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg FrameRefIterator)) FrameRefGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return FrameRefGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return FrameRefGapIterator{} + } + } +} + +// +stateify savable +type FrameRefnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *FrameRefnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap FrameRefdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [FrameRefmaxDegree - 1]__generics_imported0.FileRange + values [FrameRefmaxDegree - 1]uint64 + children [FrameRefmaxDegree]*FrameRefnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FrameRefnode) firstSegment() FrameRefIterator { + for n.hasChildren { + n = n.children[0] + } + return FrameRefIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FrameRefnode) lastSegment() FrameRefIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FrameRefIterator{n, n.nrSegments - 1} +} + +func (n *FrameRefnode) prevSibling() *FrameRefnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *FrameRefnode) nextSibling() *FrameRefnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *FrameRefnode) rebalanceBeforeInsert(gap FrameRefGapIterator) FrameRefGapIterator { + if n.nrSegments < FrameRefmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &FrameRefnode{ + nrSegments: FrameRefminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &FrameRefnode{ + nrSegments: FrameRefminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:FrameRefminDegree-1], n.keys[:FrameRefminDegree-1]) + copy(left.values[:FrameRefminDegree-1], n.values[:FrameRefminDegree-1]) + copy(right.keys[:FrameRefminDegree-1], n.keys[FrameRefminDegree:]) + copy(right.values[:FrameRefminDegree-1], n.values[FrameRefminDegree:]) + n.keys[0], n.values[0] = n.keys[FrameRefminDegree-1], n.values[FrameRefminDegree-1] + FrameRefzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:FrameRefminDegree], n.children[:FrameRefminDegree]) + copy(right.children[:FrameRefminDegree], n.children[FrameRefminDegree:]) + FrameRefzeroNodeSlice(n.children[2:]) + for i := 0; i < FrameRefminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if FrameReftrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < FrameRefminDegree { + return FrameRefGapIterator{left, gap.index} + } + return FrameRefGapIterator{right, gap.index - FrameRefminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FrameRefminDegree-1], n.values[FrameRefminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &FrameRefnode{ + nrSegments: FrameRefminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:FrameRefminDegree-1], n.keys[FrameRefminDegree:]) + copy(sibling.values[:FrameRefminDegree-1], n.values[FrameRefminDegree:]) + FrameRefzeroValueSlice(n.values[FrameRefminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:FrameRefminDegree], n.children[FrameRefminDegree:]) + FrameRefzeroNodeSlice(n.children[FrameRefminDegree:]) + for i := 0; i < FrameRefminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = FrameRefminDegree - 1 + + if FrameReftrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < FrameRefminDegree { + return gap + } + return FrameRefGapIterator{sibling, gap.index - FrameRefminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *FrameRefnode) rebalanceAfterRemove(gap FrameRefGapIterator) FrameRefGapIterator { + for { + if n.nrSegments >= FrameRefminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FrameRefminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + FrameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FrameReftrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return FrameRefGapIterator{n, 0} + } + if gap.node == n { + return FrameRefGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FrameRefminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + FrameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FrameReftrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return FrameRefGapIterator{n, n.nrSegments} + } + return FrameRefGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return FrameRefGapIterator{p, gap.index} + } + if gap.node == right { + return FrameRefGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *FrameRefnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = FrameRefGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + FrameRefSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if FrameReftrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *FrameRefnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *FrameRefnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *FrameRefnode) calculateMaxGapLeaf() uint64 { + max := FrameRefGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (FrameRefGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *FrameRefnode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FrameRefnode) searchFirstLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if n.maxGap.Get() < minSize { + return FrameRefGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := FrameRefGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FrameRefnode) searchLastLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if n.maxGap.Get() < minSize { + return FrameRefGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := FrameRefGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FrameRefIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *FrameRefnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg FrameRefIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg FrameRefIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg FrameRefIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg FrameRefIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg FrameRefIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg FrameRefIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg FrameRefIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg FrameRefIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg FrameRefIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg FrameRefIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg FrameRefIterator) Value() uint64 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg FrameRefIterator) ValuePtr() *uint64 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg FrameRefIterator) SetValue(val uint64) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg FrameRefIterator) PrevSegment() FrameRefIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return FrameRefIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return FrameRefIterator{} + } + return FrameRefsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg FrameRefIterator) NextSegment() FrameRefIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return FrameRefIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return FrameRefIterator{} + } + return FrameRefsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg FrameRefIterator) PrevGap() FrameRefGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return FrameRefGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg FrameRefIterator) NextGap() FrameRefGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return FrameRefGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg FrameRefIterator) PrevNonEmpty() (FrameRefIterator, FrameRefGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return FrameRefIterator{}, gap + } + return gap.PrevSegment(), FrameRefGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg FrameRefIterator) NextNonEmpty() (FrameRefIterator, FrameRefGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return FrameRefIterator{}, gap + } + return gap.NextSegment(), FrameRefGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FrameRefGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *FrameRefnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap FrameRefGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap FrameRefGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap FrameRefGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return FrameRefSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap FrameRefGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return FrameRefSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap FrameRefGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap FrameRefGapIterator) PrevSegment() FrameRefIterator { + return FrameRefsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap FrameRefGapIterator) NextSegment() FrameRefIterator { + return FrameRefsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap FrameRefGapIterator) PrevGap() FrameRefGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return FrameRefGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap FrameRefGapIterator) NextGap() FrameRefGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return FrameRefGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FrameRefGapIterator) NextLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if FrameReftrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap FrameRefGapIterator) nextLargeEnoughGapHelper(minSize uint64) FrameRefGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FrameRefGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FrameRefGapIterator) PrevLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if FrameReftrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap FrameRefGapIterator) prevLargeEnoughGapHelper(minSize uint64) FrameRefGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FrameRefGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func FrameRefsegmentBeforePosition(n *FrameRefnode, i int) FrameRefIterator { + for i == 0 { + if n.parent == nil { + return FrameRefIterator{} + } + n, i = n.parent, n.parentIndex + } + return FrameRefIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func FrameRefsegmentAfterPosition(n *FrameRefnode, i int) FrameRefIterator { + for i == n.nrSegments { + if n.parent == nil { + return FrameRefIterator{} + } + n, i = n.parent, n.parentIndex + } + return FrameRefIterator{n, i} +} + +func FrameRefzeroValueSlice(slice []uint64) { + + for i := range slice { + FrameRefSetFunctions{}.ClearValue(&slice[i]) + } +} + +func FrameRefzeroNodeSlice(slice []*FrameRefnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *FrameRefSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *FrameRefnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *FrameRefnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if FrameReftrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type FrameRefSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []uint64 +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *FrameRefSet) ExportSortedSlices() *FrameRefSegmentDataSlices { + var sds FrameRefSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *FrameRefSet) ImportSortedSlices(sds *FrameRefSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *FrameRefSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.FileRange, uint64) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *FrameRefSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *FrameRefSet) saveRoot() *FrameRefSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *FrameRefSet) loadRoot(sds *FrameRefSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/fsutil_impl_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_impl_state_autogen.go new file mode 100644 index 000000000..b7f95bde7 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_impl_state_autogen.go @@ -0,0 +1,328 @@ +// automatically generated by stateify. + +package fsutil + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *DirtySet) StateTypeName() string { + return "pkg/sentry/fs/fsutil.DirtySet" +} + +func (s *DirtySet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *DirtySet) beforeSave() {} + +// +checklocksignore +func (s *DirtySet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *DirtySegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *DirtySet) afterLoad() {} + +// +checklocksignore +func (s *DirtySet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*DirtySegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*DirtySegmentDataSlices)) }) +} + +func (n *Dirtynode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.Dirtynode" +} + +func (n *Dirtynode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *Dirtynode) beforeSave() {} + +// +checklocksignore +func (n *Dirtynode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *Dirtynode) afterLoad() {} + +// +checklocksignore +func (n *Dirtynode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (d *DirtySegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/fsutil.DirtySegmentDataSlices" +} + +func (d *DirtySegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (d *DirtySegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (d *DirtySegmentDataSlices) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Start) + stateSinkObject.Save(1, &d.End) + stateSinkObject.Save(2, &d.Values) +} + +func (d *DirtySegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (d *DirtySegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Start) + stateSourceObject.Load(1, &d.End) + stateSourceObject.Load(2, &d.Values) +} + +func (s *FileRangeSet) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileRangeSet" +} + +func (s *FileRangeSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *FileRangeSet) beforeSave() {} + +// +checklocksignore +func (s *FileRangeSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *FileRangeSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *FileRangeSet) afterLoad() {} + +// +checklocksignore +func (s *FileRangeSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*FileRangeSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*FileRangeSegmentDataSlices)) }) +} + +func (n *FileRangenode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileRangenode" +} + +func (n *FileRangenode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *FileRangenode) beforeSave() {} + +// +checklocksignore +func (n *FileRangenode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *FileRangenode) afterLoad() {} + +// +checklocksignore +func (n *FileRangenode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (f *FileRangeSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileRangeSegmentDataSlices" +} + +func (f *FileRangeSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (f *FileRangeSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (f *FileRangeSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Start) + stateSinkObject.Save(1, &f.End) + stateSinkObject.Save(2, &f.Values) +} + +func (f *FileRangeSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (f *FileRangeSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Start) + stateSourceObject.Load(1, &f.End) + stateSourceObject.Load(2, &f.Values) +} + +func (s *FrameRefSet) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FrameRefSet" +} + +func (s *FrameRefSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *FrameRefSet) beforeSave() {} + +// +checklocksignore +func (s *FrameRefSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *FrameRefSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *FrameRefSet) afterLoad() {} + +// +checklocksignore +func (s *FrameRefSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*FrameRefSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*FrameRefSegmentDataSlices)) }) +} + +func (n *FrameRefnode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FrameRefnode" +} + +func (n *FrameRefnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *FrameRefnode) beforeSave() {} + +// +checklocksignore +func (n *FrameRefnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *FrameRefnode) afterLoad() {} + +// +checklocksignore +func (n *FrameRefnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (f *FrameRefSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FrameRefSegmentDataSlices" +} + +func (f *FrameRefSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (f *FrameRefSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (f *FrameRefSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Start) + stateSinkObject.Save(1, &f.End) + stateSinkObject.Save(2, &f.Values) +} + +func (f *FrameRefSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (f *FrameRefSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Start) + stateSourceObject.Load(1, &f.End) + stateSourceObject.Load(2, &f.Values) +} + +func init() { + state.Register((*DirtySet)(nil)) + state.Register((*Dirtynode)(nil)) + state.Register((*DirtySegmentDataSlices)(nil)) + state.Register((*FileRangeSet)(nil)) + state.Register((*FileRangenode)(nil)) + state.Register((*FileRangeSegmentDataSlices)(nil)) + state.Register((*FrameRefSet)(nil)) + state.Register((*FrameRefnode)(nil)) + state.Register((*FrameRefSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/fs/fsutil/fsutil_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go new file mode 100644 index 000000000..748eb9272 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go @@ -0,0 +1,411 @@ +// automatically generated by stateify. + +package fsutil + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *DirtyInfo) StateTypeName() string { + return "pkg/sentry/fs/fsutil.DirtyInfo" +} + +func (d *DirtyInfo) StateFields() []string { + return []string{ + "Keep", + } +} + +func (d *DirtyInfo) beforeSave() {} + +// +checklocksignore +func (d *DirtyInfo) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Keep) +} + +func (d *DirtyInfo) afterLoad() {} + +// +checklocksignore +func (d *DirtyInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Keep) +} + +func (sdfo *StaticDirFileOperations) StateTypeName() string { + return "pkg/sentry/fs/fsutil.StaticDirFileOperations" +} + +func (sdfo *StaticDirFileOperations) StateFields() []string { + return []string{ + "dentryMap", + "dirCursor", + } +} + +func (sdfo *StaticDirFileOperations) beforeSave() {} + +// +checklocksignore +func (sdfo *StaticDirFileOperations) StateSave(stateSinkObject state.Sink) { + sdfo.beforeSave() + stateSinkObject.Save(0, &sdfo.dentryMap) + stateSinkObject.Save(1, &sdfo.dirCursor) +} + +func (sdfo *StaticDirFileOperations) afterLoad() {} + +// +checklocksignore +func (sdfo *StaticDirFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sdfo.dentryMap) + stateSourceObject.Load(1, &sdfo.dirCursor) +} + +func (n *NoReadWriteFile) StateTypeName() string { + return "pkg/sentry/fs/fsutil.NoReadWriteFile" +} + +func (n *NoReadWriteFile) StateFields() []string { + return []string{} +} + +func (n *NoReadWriteFile) beforeSave() {} + +// +checklocksignore +func (n *NoReadWriteFile) StateSave(stateSinkObject state.Sink) { + n.beforeSave() +} + +func (n *NoReadWriteFile) afterLoad() {} + +// +checklocksignore +func (n *NoReadWriteFile) StateLoad(stateSourceObject state.Source) { +} + +func (scr *FileStaticContentReader) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileStaticContentReader" +} + +func (scr *FileStaticContentReader) StateFields() []string { + return []string{ + "content", + } +} + +func (scr *FileStaticContentReader) beforeSave() {} + +// +checklocksignore +func (scr *FileStaticContentReader) StateSave(stateSinkObject state.Sink) { + scr.beforeSave() + stateSinkObject.Save(0, &scr.content) +} + +func (scr *FileStaticContentReader) afterLoad() {} + +// +checklocksignore +func (scr *FileStaticContentReader) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &scr.content) +} + +func (f *HostFileMapper) StateTypeName() string { + return "pkg/sentry/fs/fsutil.HostFileMapper" +} + +func (f *HostFileMapper) StateFields() []string { + return []string{ + "refs", + } +} + +func (f *HostFileMapper) beforeSave() {} + +// +checklocksignore +func (f *HostFileMapper) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.refs) +} + +// +checklocksignore +func (f *HostFileMapper) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.refs) + stateSourceObject.AfterLoad(f.afterLoad) +} + +func (h *HostMappable) StateTypeName() string { + return "pkg/sentry/fs/fsutil.HostMappable" +} + +func (h *HostMappable) StateFields() []string { + return []string{ + "hostFileMapper", + "backingFile", + "mappings", + } +} + +func (h *HostMappable) beforeSave() {} + +// +checklocksignore +func (h *HostMappable) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.hostFileMapper) + stateSinkObject.Save(1, &h.backingFile) + stateSinkObject.Save(2, &h.mappings) +} + +func (h *HostMappable) afterLoad() {} + +// +checklocksignore +func (h *HostMappable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.hostFileMapper) + stateSourceObject.Load(1, &h.backingFile) + stateSourceObject.Load(2, &h.mappings) +} + +func (s *SimpleFileInode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.SimpleFileInode" +} + +func (s *SimpleFileInode) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (s *SimpleFileInode) beforeSave() {} + +// +checklocksignore +func (s *SimpleFileInode) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) +} + +func (s *SimpleFileInode) afterLoad() {} + +// +checklocksignore +func (s *SimpleFileInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) +} + +func (n *NoReadWriteFileInode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.NoReadWriteFileInode" +} + +func (n *NoReadWriteFileInode) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (n *NoReadWriteFileInode) beforeSave() {} + +// +checklocksignore +func (n *NoReadWriteFileInode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.InodeSimpleAttributes) +} + +func (n *NoReadWriteFileInode) afterLoad() {} + +// +checklocksignore +func (n *NoReadWriteFileInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.InodeSimpleAttributes) +} + +func (i *InodeSimpleAttributes) StateTypeName() string { + return "pkg/sentry/fs/fsutil.InodeSimpleAttributes" +} + +func (i *InodeSimpleAttributes) StateFields() []string { + return []string{ + "fsType", + "unstable", + } +} + +func (i *InodeSimpleAttributes) beforeSave() {} + +// +checklocksignore +func (i *InodeSimpleAttributes) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fsType) + stateSinkObject.Save(1, &i.unstable) +} + +func (i *InodeSimpleAttributes) afterLoad() {} + +// +checklocksignore +func (i *InodeSimpleAttributes) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.fsType) + stateSourceObject.Load(1, &i.unstable) +} + +func (i *InodeSimpleExtendedAttributes) StateTypeName() string { + return "pkg/sentry/fs/fsutil.InodeSimpleExtendedAttributes" +} + +func (i *InodeSimpleExtendedAttributes) StateFields() []string { + return []string{ + "xattrs", + } +} + +func (i *InodeSimpleExtendedAttributes) beforeSave() {} + +// +checklocksignore +func (i *InodeSimpleExtendedAttributes) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.xattrs) +} + +func (i *InodeSimpleExtendedAttributes) afterLoad() {} + +// +checklocksignore +func (i *InodeSimpleExtendedAttributes) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.xattrs) +} + +func (s *staticFile) StateTypeName() string { + return "pkg/sentry/fs/fsutil.staticFile" +} + +func (s *staticFile) StateFields() []string { + return []string{ + "FileStaticContentReader", + } +} + +func (s *staticFile) beforeSave() {} + +// +checklocksignore +func (s *staticFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.FileStaticContentReader) +} + +func (s *staticFile) afterLoad() {} + +// +checklocksignore +func (s *staticFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.FileStaticContentReader) +} + +func (i *InodeStaticFileGetter) StateTypeName() string { + return "pkg/sentry/fs/fsutil.InodeStaticFileGetter" +} + +func (i *InodeStaticFileGetter) StateFields() []string { + return []string{ + "Contents", + } +} + +func (i *InodeStaticFileGetter) beforeSave() {} + +// +checklocksignore +func (i *InodeStaticFileGetter) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Contents) +} + +func (i *InodeStaticFileGetter) afterLoad() {} + +// +checklocksignore +func (i *InodeStaticFileGetter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Contents) +} + +func (c *CachingInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/fsutil.CachingInodeOperations" +} + +func (c *CachingInodeOperations) StateFields() []string { + return []string{ + "backingFile", + "mfp", + "opts", + "attr", + "dirtyAttr", + "mappings", + "cache", + "dirty", + "hostFileMapper", + "refs", + } +} + +func (c *CachingInodeOperations) beforeSave() {} + +// +checklocksignore +func (c *CachingInodeOperations) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.backingFile) + stateSinkObject.Save(1, &c.mfp) + stateSinkObject.Save(2, &c.opts) + stateSinkObject.Save(3, &c.attr) + stateSinkObject.Save(4, &c.dirtyAttr) + stateSinkObject.Save(5, &c.mappings) + stateSinkObject.Save(6, &c.cache) + stateSinkObject.Save(7, &c.dirty) + stateSinkObject.Save(8, &c.hostFileMapper) + stateSinkObject.Save(9, &c.refs) +} + +func (c *CachingInodeOperations) afterLoad() {} + +// +checklocksignore +func (c *CachingInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.backingFile) + stateSourceObject.Load(1, &c.mfp) + stateSourceObject.Load(2, &c.opts) + stateSourceObject.Load(3, &c.attr) + stateSourceObject.Load(4, &c.dirtyAttr) + stateSourceObject.Load(5, &c.mappings) + stateSourceObject.Load(6, &c.cache) + stateSourceObject.Load(7, &c.dirty) + stateSourceObject.Load(8, &c.hostFileMapper) + stateSourceObject.Load(9, &c.refs) +} + +func (c *CachingInodeOperationsOptions) StateTypeName() string { + return "pkg/sentry/fs/fsutil.CachingInodeOperationsOptions" +} + +func (c *CachingInodeOperationsOptions) StateFields() []string { + return []string{ + "ForcePageCache", + "LimitHostFDTranslation", + } +} + +func (c *CachingInodeOperationsOptions) beforeSave() {} + +// +checklocksignore +func (c *CachingInodeOperationsOptions) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.ForcePageCache) + stateSinkObject.Save(1, &c.LimitHostFDTranslation) +} + +func (c *CachingInodeOperationsOptions) afterLoad() {} + +// +checklocksignore +func (c *CachingInodeOperationsOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.ForcePageCache) + stateSourceObject.Load(1, &c.LimitHostFDTranslation) +} + +func init() { + state.Register((*DirtyInfo)(nil)) + state.Register((*StaticDirFileOperations)(nil)) + state.Register((*NoReadWriteFile)(nil)) + state.Register((*FileStaticContentReader)(nil)) + state.Register((*HostFileMapper)(nil)) + state.Register((*HostMappable)(nil)) + state.Register((*SimpleFileInode)(nil)) + state.Register((*NoReadWriteFileInode)(nil)) + state.Register((*InodeSimpleAttributes)(nil)) + state.Register((*InodeSimpleExtendedAttributes)(nil)) + state.Register((*staticFile)(nil)) + state.Register((*InodeStaticFileGetter)(nil)) + state.Register((*CachingInodeOperations)(nil)) + state.Register((*CachingInodeOperationsOptions)(nil)) +} diff --git a/pkg/sentry/fs/fsutil/fsutil_unsafe_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_unsafe_state_autogen.go new file mode 100644 index 000000000..00b0994f6 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package fsutil diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go deleted file mode 100644 index 25e76d9f2..000000000 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ /dev/null @@ -1,390 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fsutil - -import ( - "bytes" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/usermem" -) - -type noopBackingFile struct{} - -func (noopBackingFile) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { - return dsts.NumBytes(), nil -} - -func (noopBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { - return srcs.NumBytes(), nil -} - -func (noopBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr, bool) error { - return nil -} - -func (noopBackingFile) Sync(context.Context) error { - return nil -} - -func (noopBackingFile) FD() int { - return -1 -} - -func (noopBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return nil -} - -func TestSetPermissions(t *testing.T) { - ctx := contexttest.Context(t) - - uattr := fs.WithCurrentTime(ctx, fs.UnstableAttr{ - Perms: fs.FilePermsFromMode(0444), - }) - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - perms := fs.FilePermsFromMode(0777) - if !iops.SetPermissions(ctx, nil, perms) { - t.Fatalf("SetPermissions failed, want success") - } - - // Did permissions change? - if iops.attr.Perms != perms { - t.Fatalf("got perms +%v, want +%v", iops.attr.Perms, perms) - } - - // Did status change time change? - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("got status change time not dirty, want dirty") - } - if iops.attr.StatusChangeTime.Equal(uattr.StatusChangeTime) { - t.Fatalf("got status change time unchanged") - } -} - -func TestSetTimestamps(t *testing.T) { - ctx := contexttest.Context(t) - for _, test := range []struct { - desc string - ts fs.TimeSpec - wantChanged fs.AttrMask - }{ - { - desc: "noop", - ts: fs.TimeSpec{ - ATimeOmit: true, - MTimeOmit: true, - }, - wantChanged: fs.AttrMask{}, - }, - { - desc: "access time only", - ts: fs.TimeSpec{ - ATime: ktime.NowFromContext(ctx), - MTimeOmit: true, - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - }, - }, - { - desc: "modification time only", - ts: fs.TimeSpec{ - ATimeOmit: true, - MTime: ktime.NowFromContext(ctx), - }, - wantChanged: fs.AttrMask{ - ModificationTime: true, - }, - }, - { - desc: "access and modification time", - ts: fs.TimeSpec{ - ATime: ktime.NowFromContext(ctx), - MTime: ktime.NowFromContext(ctx), - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - ModificationTime: true, - }, - }, - { - desc: "system time access and modification time", - ts: fs.TimeSpec{ - ATimeSetSystemTime: true, - MTimeSetSystemTime: true, - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - ModificationTime: true, - }, - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - - epoch := ktime.ZeroTime - uattr := fs.UnstableAttr{ - AccessTime: epoch, - ModificationTime: epoch, - StatusChangeTime: epoch, - } - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - if err := iops.SetTimestamps(ctx, nil, test.ts); err != nil { - t.Fatalf("SetTimestamps got error %v, want nil", err) - } - if test.wantChanged.AccessTime { - if !iops.attr.AccessTime.After(uattr.AccessTime) { - t.Fatalf("diritied access time did not advance, want %v > %v", iops.attr.AccessTime, uattr.AccessTime) - } - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("dirty access time requires dirty status change time") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not advance") - } - } - if test.wantChanged.ModificationTime { - if !iops.attr.ModificationTime.After(uattr.ModificationTime) { - t.Fatalf("diritied modification time did not advance") - } - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("dirty modification time requires dirty status change time") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not advance") - } - } - }) - } -} - -func TestTruncate(t *testing.T) { - ctx := contexttest.Context(t) - - uattr := fs.UnstableAttr{ - Size: 0, - } - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - if err := iops.Truncate(ctx, nil, uattr.Size); err != nil { - t.Fatalf("Truncate got error %v, want nil", err) - } - var size int64 = 4096 - if err := iops.Truncate(ctx, nil, size); err != nil { - t.Fatalf("Truncate got error %v, want nil", err) - } - if iops.attr.Size != size { - t.Fatalf("Truncate got %d, want %d", iops.attr.Size, size) - } - if !iops.dirtyAttr.ModificationTime || !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("Truncate did not dirty modification and status change time") - } - if !iops.attr.ModificationTime.After(uattr.ModificationTime) { - t.Fatalf("dirtied modification time did not change") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not change") - } -} - -type sliceBackingFile struct { - data []byte -} - -func newSliceBackingFile(data []byte) *sliceBackingFile { - return &sliceBackingFile{data} -} - -func (f *sliceBackingFile) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { - r := safemem.BlockSeqReader{safemem.BlockSeqOf(safemem.BlockFromSafeSlice(f.data)).DropFirst64(offset)} - return r.ReadToBlocks(dsts) -} - -func (f *sliceBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { - w := safemem.BlockSeqWriter{safemem.BlockSeqOf(safemem.BlockFromSafeSlice(f.data)).DropFirst64(offset)} - return w.WriteFromBlocks(srcs) -} - -func (*sliceBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr, bool) error { - return nil -} - -func (*sliceBackingFile) Sync(context.Context) error { - return nil -} - -func (*sliceBackingFile) FD() int { - return -1 -} - -func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return linuxerr.EOPNOTSUPP -} - -type noopMappingSpace struct{} - -// Invalidate implements memmap.MappingSpace.Invalidate. -func (noopMappingSpace) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) { -} - -func anonInode(ctx context.Context) *fs.Inode { - return fs.NewInode(ctx, &SimpleFileInode{ - InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, - }, 0), - }, fs.NewPseudoMountSource(ctx), fs.StableAttr{ - Type: fs.Anonymous, - BlockSize: hostarch.PageSize, - }) -} - -func pagesOf(bs ...byte) []byte { - buf := make([]byte, 0, len(bs)*hostarch.PageSize) - for _, b := range bs { - buf = append(buf, bytes.Repeat([]byte{b}, hostarch.PageSize)...) - } - return buf -} - -func TestRead(t *testing.T) { - ctx := contexttest.Context(t) - - // Construct a 3-page file. - buf := pagesOf('a', 'b', 'c') - file := fs.NewFile(ctx, fs.NewDirent(ctx, anonInode(ctx), "anon"), fs.FileFlags{}, nil) - uattr := fs.UnstableAttr{ - Size: int64(len(buf)), - } - iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - // Expect the cache to be initially empty. - if cached := iops.cache.Span(); cached != 0 { - t.Errorf("Span got %d, want 0", cached) - } - - // Create a memory mapping of the second page (as CachingInodeOperations - // expects to only cache mapped pages), then call Translate to force it to - // be cached. - var ms noopMappingSpace - ar := hostarch.AddrRange{hostarch.PageSize, 2 * hostarch.PageSize} - if err := iops.AddMapping(ctx, ms, ar, hostarch.PageSize, true); err != nil { - t.Fatalf("AddMapping got %v, want nil", err) - } - mr := memmap.MappableRange{hostarch.PageSize, 2 * hostarch.PageSize} - if _, err := iops.Translate(ctx, mr, mr, hostarch.Read); err != nil { - t.Fatalf("Translate got %v, want nil", err) - } - if cached := iops.cache.Span(); cached != hostarch.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, hostarch.PageSize) - } - - // Try to read 4 pages. The first and third pages should be read directly - // from the "file", the second page should be read from the cache, and only - // 3 pages (the size of the file) should be readable. - rbuf := make([]byte, 4*hostarch.PageSize) - dst := usermem.BytesIOSequence(rbuf) - n, err := iops.Read(ctx, file, dst, 0) - if n != 3*hostarch.PageSize || (err != nil && err != io.EOF) { - t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*hostarch.PageSize) - } - rbuf = rbuf[:3*hostarch.PageSize] - - // Did we get the bytes we expect? - if !bytes.Equal(rbuf, buf) { - t.Errorf("Read back bytes %v, want %v", rbuf, buf) - } - - // Delete the memory mapping before iops.Release(). The cached page will - // either be evicted by ctx's pgalloc.MemoryFile, or dropped by - // iops.Release(). - iops.RemoveMapping(ctx, ms, ar, hostarch.PageSize, true) -} - -func TestWrite(t *testing.T) { - ctx := contexttest.Context(t) - - // Construct a 4-page file. - buf := pagesOf('a', 'b', 'c', 'd') - orig := append([]byte(nil), buf...) - inode := anonInode(ctx) - uattr := fs.UnstableAttr{ - Size: int64(len(buf)), - } - iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - // Expect the cache to be initially empty. - if cached := iops.cache.Span(); cached != 0 { - t.Errorf("Span got %d, want 0", cached) - } - - // Create a memory mapping of the second and third pages (as - // CachingInodeOperations expects to only cache mapped pages), then call - // Translate to force them to be cached. - var ms noopMappingSpace - ar := hostarch.AddrRange{hostarch.PageSize, 3 * hostarch.PageSize} - if err := iops.AddMapping(ctx, ms, ar, hostarch.PageSize, true); err != nil { - t.Fatalf("AddMapping got %v, want nil", err) - } - defer iops.RemoveMapping(ctx, ms, ar, hostarch.PageSize, true) - mr := memmap.MappableRange{hostarch.PageSize, 3 * hostarch.PageSize} - if _, err := iops.Translate(ctx, mr, mr, hostarch.Read); err != nil { - t.Fatalf("Translate got %v, want nil", err) - } - if cached := iops.cache.Span(); cached != 2*hostarch.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, 2*hostarch.PageSize) - } - - // Write to the first 2 pages. - wbuf := pagesOf('e', 'f') - src := usermem.BytesIOSequence(wbuf) - n, err := iops.Write(ctx, src, 0) - if n != 2*hostarch.PageSize || err != nil { - t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*hostarch.PageSize) - } - - // The first page should have been written directly, since it was not cached. - want := append([]byte(nil), orig...) - copy(want, pagesOf('e')) - if !bytes.Equal(buf, want) { - t.Errorf("File contents are %v, want %v", buf, want) - } - - // Sync back to the "backing file". - if err := iops.WriteOut(ctx, inode); err != nil { - t.Errorf("Sync got %v, want nil", err) - } - - // Now the second page should have been written as well. - copy(want[hostarch.PageSize:], pagesOf('f')) - if !bytes.Equal(buf, want) { - t.Errorf("File contents are %v, want %v", buf, want) - } -} diff --git a/pkg/sentry/fs/g3doc/.gitignore b/pkg/sentry/fs/g3doc/.gitignore deleted file mode 100644 index 2d19fc766..000000000 --- a/pkg/sentry/fs/g3doc/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.html diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md deleted file mode 100644 index 05e043583..000000000 --- a/pkg/sentry/fs/g3doc/fuse.md +++ /dev/null @@ -1,360 +0,0 @@ -# Foreword - -This document describes an on-going project to support FUSE filesystems within -the sentry. This is intended to become the final documentation for this -subsystem, and is therefore written in the past tense. However FUSE support is -currently incomplete and the document will be updated as things progress. - -# FUSE: Filesystem in Userspace - -The sentry supports dispatching filesystem operations to a FUSE server, allowing -FUSE filesystem to be used with a sandbox. - -## Overview - -FUSE has two main components: - -1. A client kernel driver (canonically `fuse.ko` in Linux), which forwards - filesystem operations (usually initiated by syscalls) to the server. - -2. A server, which is a userspace daemon that implements the actual filesystem. - -The sentry implements the client component, which allows a server daemon running -within the sandbox to implement a filesystem within the sandbox. - -A FUSE filesystem is initialized with `mount(2)`, typically with the help of a -utility like `fusermount(1)`. Various mount options exist for establishing -ownership and access permissions on the filesystem, but the most important mount -option is a file descriptor used to establish communication between the client -and server. - -The FUSE device FD is obtained by opening `/dev/fuse`. During regular operation, -the client and server use the FUSE protocol described in `fuse(4)` to service -filesystem operations. See the "Protocol" section below for more information -about this protocol. The core of the sentry support for FUSE is the client-side -implementation of this protocol. - -## FUSE in the Sentry - -The sentry's FUSE client targets VFS2 and has the following components: - -- An implementation of `/dev/fuse`. - -- A VFS2 filesystem for mapping syscalls to FUSE ops. Since we're targeting - VFS2, one point of contention may be the lack of inodes in VFS2. We can - tentatively implement a kernfs-based filesystem to bridge the gap in APIs. - The kernfs base functionality can serve the role of the Linux inode cache - and, the filesystem can map VFS2 syscalls to kernfs inode operations; see - the `kernfs.Inode` interface. - -The FUSE protocol lends itself well to marshaling with `go_marshal`. The various -request and response packets can be defined in the ABI package and converted to -and from the wire format using `go_marshal`. - -### Design Goals - -- While filesystem performance is always important, the sentry's FUSE support - is primarily concerned with compatibility, with performance as a secondary - concern. - -- Avoiding deadlocks from a hung server daemon. - -- Consider the potential for denial of service from a malicious server daemon. - Protecting itself from userspace is already a design goal for the sentry, - but needs additional consideration for FUSE. Normally, an operating system - doesn't rely on userspace to make progress with filesystem operations. Since - this changes with FUSE, it opens up the possibility of creating a chain of - dependencies controlled by userspace, which could affect an entire sandbox. - For example: a FUSE op can block a syscall, which could be holding a - subsystem lock, which can then block another task goroutine. - -### Milestones - -Below are some broad goals to aim for while implementing FUSE in the sentry. -Many FUSE ops can be grouped into broad categories of functionality, and most -ops can be implemented in parallel. - -#### Minimal client that can mount a trivial FUSE filesystem. - -- Implement `/dev/fuse` - a character device used to establish an FD for - communication between the sentry and the server daemon. - -- Implement basic FUSE ops like `FUSE_INIT`. - -#### Read-only mount with basic file operations - -- Implement the majority of file, directory and file descriptor FUSE ops. For - this milestone, we can skip uncommon or complex operations like mmap, mknod, - file locking, poll, and extended attributes. We can stub these out along - with any ops that modify the filesystem. The exact list of required ops are - to be determined, but the goal is to mount a real filesystem as read-only, - and be able to read contents from the filesystem in the sentry. - -#### Full read-write support - -- Implement the remaining FUSE ops and decide if we can omit rarely used - operations like ioctl. - -### Design Details - -#### Lifecycle for a FUSE Request - -- User invokes a syscall -- Sentry prepares corresponding request - - If FUSE device is available - - Write the request in binary - - If FUSE device is full - - Kernel task blocked until available -- Sentry notifies the readers of fuse device that it's ready for read -- FUSE daemon reads the request and processes it -- Sentry waits until a reply is written to the FUSE device - - but returns directly for async requests -- FUSE daemon writes to the fuse device -- Sentry processes the reply - - For sync requests, unblock blocked kernel task - - For async requests, execute pre-specified callback if any -- Sentry returns the syscall to the user - -#### Channels and Queues for Requests in Different Stages - -`connection.initializedChan` - -- a channel that the requests issued before connection initialization blocks - on. - -`fd.queue` - -- a queue of requests that haven’t been read by the FUSE daemon yet. - -`fd.completions` - -- a map of the requests that have been prepared but not yet received a - response, including the ones on the `fd.queue`. - -`fd.waitQueue` - -- a queue of waiters that is waiting for the fuse device fd to be available, - such as the FUSE daemon. - -`fd.fullQueueCh` - -- a channel that the kernel task will be blocked on when the fd is not - available. - -#### Basic I/O Implementation - -Currently we have implemented basic functionalities of read and write for our -FUSE. We describe the design and ways to improve it here: - -##### Basic FUSE Read - -The vfs2 expects implementations of `vfs.FileDescriptionImpl.Read()` and -`vfs.FileDescriptionImpl.PRead()`. When a syscall is made, it will eventually -reach our implementation of those interface functions located at -`pkg/sentry/fsimpl/fuse/regular_file.go` for regular files. - -After validation checks of the input, sentry sends `FUSE_READ` requests to the -FUSE daemon. The FUSE daemon returns data after the `fuse_out_header` as the -responses. For the first version, we create a copy in kernel memory of those -data. They are represented as a byte slice in the marshalled struct. This -happens as a common process for all the FUSE responses at this moment at -`pkg/sentry/fsimpl/fuse/dev.go:writeLocked()`. We then directly copy from this -intermediate buffer to the input buffer provided by the read syscall. - -There is an extra requirement for FUSE: When mounting the FUSE fs, the mounter -or the FUSE daemon can specify a `max_read` or a `max_pages` parameter. They are -the upperbound of the bytes to read in each `FUSE_READ` request. We implemented -the code to handle the fragmented reads. - -To improve the performance: ideally we should have buffer cache to copy those -data from the responses of FUSE daemon into, as is also the design of several -other existing file system implementations for sentry, instead of a single-use -temporary buffer. Directly mapping the memory of one process to another could -also boost the performance, but to keep them isolated, we did not choose to do -so. - -##### Basic FUSE Write - -The vfs2 invokes implementations of `vfs.FileDescriptionImpl.Write()` and -`vfs.FileDescriptionImpl.PWrite()` on the regular file descriptor of FUSE when a -user makes write(2) and pwrite(2) syscall. - -For valid writes, sentry sends the bytes to write after a `FUSE_WRITE` header -(can be regarded as a request with 2 payloads) to the FUSE daemon. For the first -version, we allocate a buffer inside kernel memory to store the bytes from the -user, and copy directly from that buffer to the memory of FUSE daemon. This -happens at `pkg/sentry/fsimpl/fuse/dev.go:readLocked()` - -The parameters `max_write` and `max_pages` restrict the number of bytes in one -`FUSE_WRITE`. There are code handling fragmented writes in current -implementation. - -To have better performance: the extra copy created to store the bytes to write -can be replaced by the buffer cache as well. - -# Appendix - -## FUSE Protocol - -The FUSE protocol is a request-response protocol. All requests are initiated by -the client. The wire-format for the protocol is raw C structs serialized to -memory. - -All FUSE requests begin with the following request header: - -```c -struct fuse_in_header { - uint32_t len; // Length of the request, including this header. - uint32_t opcode; // Requested operation. - uint64_t unique; // A unique identifier for this request. - uint64_t nodeid; // ID of the filesystem object being operated on. - uint32_t uid; // UID of the requesting process. - uint32_t gid; // GID of the requesting process. - uint32_t pid; // PID of the requesting process. - uint32_t padding; -}; -``` - -The request is then followed by a payload specific to the `opcode`. - -All responses begin with this response header: - -```c -struct fuse_out_header { - uint32_t len; // Length of the response, including this header. - int32_t error; // Status of the request, 0 if success. - uint64_t unique; // The unique identifier from the corresponding request. -}; -``` - -The response payload also depends on the request `opcode`. If `error != 0`, the -response payload must be empty. - -### Operations - -The following is a list of all FUSE operations used in `fuse_in_header.opcode` -as of Linux v4.4, and a brief description of their purpose. These are defined in -`uapi/linux/fuse.h`. Many of these have a corresponding request and response -payload struct; `fuse(4)` has details for some of these. We also note how these -operations map to the sentry virtual filesystem. - -#### FUSE meta-operations - -These operations are specific to FUSE and don't have a corresponding action in a -generic filesystem. - -- `FUSE_INIT`: This operation initializes a new FUSE filesystem, and is the - first message sent by the client after mount. This is used for version and - feature negotiation. This is related to `mount(2)`. -- `FUSE_DESTROY`: Teardown a FUSE filesystem, related to `unmount(2)`. -- `FUSE_INTERRUPT`: Interrupts an in-flight operation, specified by the - `fuse_in_header.unique` value provided in the corresponding request header. - The client can send at most one of these per request, and will enter an - uninterruptible wait for a reply. The server is expected to reply promptly. -- `FUSE_FORGET`: A hint to the server that server should evict the indicate - node from any caches. This is wired up to `(struct - super_operations).evict_inode` in Linux, which is in turned hooked as the - inode cache shrinker which is typically triggered by system memory pressure. -- `FUSE_BATCH_FORGET`: Batch version of `FUSE_FORGET`. - -#### Filesystem Syscalls - -These FUSE ops map directly to an equivalent filesystem syscall, or family of -syscalls. The relevant syscalls have a similar name to the operation, unless -otherwise noted. - -Node creation: - -- `FUSE_MKNOD` -- `FUSE_MKDIR` -- `FUSE_CREATE`: This is equivalent to `open(2)` and `creat(2)`, which - atomically creates and opens a node. - -Node attributes and extended attributes: - -- `FUSE_GETATTR` -- `FUSE_SETATTR` -- `FUSE_SETXATTR` -- `FUSE_GETXATTR` -- `FUSE_LISTXATTR` -- `FUSE_REMOVEXATTR` - -Node link manipulation: - -- `FUSE_READLINK` -- `FUSE_LINK` -- `FUSE_SYMLINK` -- `FUSE_UNLINK` - -Directory operations: - -- `FUSE_RMDIR` -- `FUSE_RENAME` -- `FUSE_RENAME2` -- `FUSE_OPENDIR`: `open(2)` for directories. -- `FUSE_RELEASEDIR`: `close(2)` for directories. -- `FUSE_READDIR` -- `FUSE_READDIRPLUS` -- `FUSE_FSYNCDIR`: `fsync(2)` for directories. -- `FUSE_LOOKUP`: Establishes a unique identifier for a FS node. This is - reminiscent of `VirtualFilesystem.GetDentryAt` in that it resolves a path - component to a node. However the returned identifier is opaque to the - client. The server must remember this mapping, as this is how the client - will reference the node in the future. - -File operations: - -- `FUSE_OPEN`: `open(2)` for files. -- `FUSE_RELEASE`: `close(2)` for files. -- `FUSE_FSYNC` -- `FUSE_FALLOCATE` -- `FUSE_SETUPMAPPING`: Creates a memory map on a file for `mmap(2)`. -- `FUSE_REMOVEMAPPING`: Removes a memory map for `munmap(2)`. - -File locking: - -- `FUSE_GETLK` -- `FUSE_SETLK` -- `FUSE_SETLKW` -- `FUSE_COPY_FILE_RANGE` - -File descriptor operations: - -- `FUSE_IOCTL` -- `FUSE_POLL` -- `FUSE_LSEEK` - -Filesystem operations: - -- `FUSE_STATFS` - -#### Permissions - -- `FUSE_ACCESS` is used to check if a node is accessible, as part of many - syscall implementations. Maps to `vfs.FilesystemImpl.AccessAt` in the - sentry. - -#### I/O Operations - -These ops are used to read and write file pages. They're used to implement both -I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`. - -- `FUSE_READ` -- `FUSE_WRITE` - -#### Miscellaneous - -- `FUSE_FLUSH`: Used by the client to indicate when a file descriptor is - closed. Distinct from `FUSE_FSYNC`, which corresponds to an `fsync(2)` - syscall from the user. Maps to `vfs.FileDescriptorImpl.Release` in the - sentry. -- `FUSE_BMAP`: Old address space API for block defrag. Probably not needed. -- `FUSE_NOTIFY_REPLY`: [TODO: what does this do?] - -# References - -- [fuse(4) Linux manual page](https://www.man7.org/linux/man-pages/man4/fuse.4.html) -- [Linux kernel FUSE documentation](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) -- [The reference implementation of the Linux FUSE (Filesystem in Userspace) - interface](https://github.com/libfuse/libfuse) -- [The kernel interface of FUSE](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fuse.h) diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md deleted file mode 100644 index 85063d4e6..000000000 --- a/pkg/sentry/fs/g3doc/inotify.md +++ /dev/null @@ -1,122 +0,0 @@ -# Inotify - -Inotify implements the like-named filesystem event notification system for the -sentry, see `inotify(7)`. - -## Architecture - -For the most part, the sentry implementation of inotify mirrors the Linux -architecture. Inotify instances (i.e. the fd returned by inotify_init(2)) are -backed by a pseudo-filesystem. Events are generated from various places in the -sentry, including the [syscall layer][syscall_dir], the [vfs layer][dirent] and -the [process fd table][fd_table]. Watches are stored in inodes and generated -events are queued to the inotify instance owning the watches for delivery to the -user. - -## Objects - -Here is a brief description of the existing and new objects involved in the -sentry inotify mechanism, and how they interact: - -### [`fs.Inotify`][inotify] - -- An inotify instances, created by inotify_init(2)/inotify_init1(2). -- The inotify fd has a `fs.Dirent`, supports filesystem syscalls to read - events. -- Has multiple `fs.Watch`es, with at most one watch per target inode, per - inotify instance. -- Has an instance `id` which is globally unique. This is *not* the fd number - for this instance, since the fd can be duped. This `id` is not externally - visible. - -### [`fs.Watch`][watch] - -- An inotify watch, created/deleted by - inotify_add_watch(2)/inotify_rm_watch(2). -- Owned by an `fs.Inotify` instance, each watch keeps a pointer to the - `owner`. -- Associated with a single `fs.Inode`, which is the watch `target`. While the - watch is active, it indirectly pins `target` to memory. See the "Reference - Model" section for a detailed explanation. -- Filesystem operations on `target` generate `fs.Event`s. - -### [`fs.Event`][event] - -- A simple struct encapsulating all the fields for an inotify event. -- Generated by `fs.Watch`es and forwarded to the watches' `owner`s. -- Serialized to the user during read(2) syscalls on the associated - `fs.Inotify`'s fd. - -### [`fs.Dirent`][dirent] - -- Many inotify events are generated inside dirent methods. Events are - generated in the dirent methods rather than `fs.Inode` methods because some - events carry the name of the subject node, and node names are generally - unavailable in an `fs.Inode`. -- Dirents do not directly contain state for any watches. Instead, they forward - notifications to the underlying `fs.Inode`. - -### [`fs.Inode`][inode] - -- Interacts with inotify through `fs.Watch`es. -- Inodes contain a map of all active `fs.Watch`es on them. -- An `fs.Inotify` instance can have at most one `fs.Watch` per inode. - `fs.Watch`es on an inode are indexed by their `owner`'s `id`. -- All inotify logic is encapsulated in the [`Watches`][inode_watches] struct - in an inode. Logically, `Watches` is the set of inotify watches on the - inode. - -## Reference Model - -The sentry inotify implementation has a complex reference model. An inotify -watch observes a single inode. For efficient lookup, the state for a watch is -stored directly on the target inode. This state needs to be persistent for the -lifetime of watch. Unlike usual filesystem metadata, the watch state has no -"on-disk" representation, so they cannot be reconstructed by the filesystem if -the inode is flushed from memory. This effectively means we need to keep any -inodes with actives watches pinned to memory. - -We can't just hold an extra ref on the inode to pin it to memory because some -filesystems (such as gofer-based filesystems) don't have persistent inodes. In -such a filesystem, if we just pin the inode, nothing prevents the enclosing -dirent from being GCed. Once the dirent is GCed, the pinned inode is -unreachable -- these filesystems generate a new inode by re-reading the node -state on the next walk. Incidentally, hardlinks also don't work on these -filesystems for this reason. - -To prevent the above scenario, when a new watch is added on an inode, we *pin* -the dirent we used to reach the inode. Note that due to hardlinks, this dirent -may not be the only dirent pointing to the inode. Attempting to set an inotify -watch via multiple hardlinks to the same file results in the same watch being -returned for both links. However, for each new dirent we use to reach the same -inode, we add a new pin. We need a new pin for each new dirent used to reach the -inode because we have no guarantees about the deletion order of the different -links to the inode. - -## Lock Ordering - -There are 4 locks related to the inotify implementation: - -- `Inotify.mu`: the inotify instance lock. -- `Inotify.evMu`: the inotify event queue lock. -- `Watch.mu`: the watch lock, used to protect pins. -- `fs.Watches.mu`: the inode watch set mu, used to protect the collection of - watches on the inode. - -The correct lock ordering for inotify code is: - -`Inotify.mu` -> `fs.Watches.mu` -> `Watch.mu` -> `Inotify.evMu`. - -We need a distinct lock for the event queue because by the time a goroutine -attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used -`Inotify.mu` to also protect the event queue, this would violate the above lock -ordering. - -[dirent]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/dirent.go -[event]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_event.go -[fd_table]: https://github.com/google/gvisor/blob/master/pkg/sentry/kernel/fd_table.go -[inode]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode.go -[inode_watches]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode_inotify.go -[inotify]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify.go -[syscall_dir]: https://github.com/google/gvisor/blob/master/pkg/sentry/syscalls/linux/ -[watch]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_watch.go diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD deleted file mode 100644 index c08301d19..000000000 --- a/pkg/sentry/fs/gofer/BUILD +++ /dev/null @@ -1,72 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "gofer", - srcs = [ - "attr.go", - "cache_policy.go", - "context_file.go", - "device.go", - "fifo.go", - "file.go", - "file_state.go", - "fs.go", - "handles.go", - "inode.go", - "inode_state.go", - "path.go", - "session.go", - "session_state.go", - "socket.go", - "util.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/hostarch", - "//pkg/log", - "//pkg/metric", - "//pkg/p9", - "//pkg/refs", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fdpipe", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/host", - "//pkg/sentry/fsmetric", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/unet", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "gofer_test", - size = "small", - srcs = ["gofer_test.go"], - library = ":gofer", - deps = [ - "//pkg/context", - "//pkg/p9", - "//pkg/p9/p9test", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fs/gofer/gofer_state_autogen.go b/pkg/sentry/fs/gofer/gofer_state_autogen.go new file mode 100644 index 000000000..d22ecc3c2 --- /dev/null +++ b/pkg/sentry/fs/gofer/gofer_state_autogen.go @@ -0,0 +1,271 @@ +// automatically generated by stateify. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *fifo) StateTypeName() string { + return "pkg/sentry/fs/gofer.fifo" +} + +func (i *fifo) StateFields() []string { + return []string{ + "InodeOperations", + "fileIops", + } +} + +func (i *fifo) beforeSave() {} + +// +checklocksignore +func (i *fifo) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeOperations) + stateSinkObject.Save(1, &i.fileIops) +} + +func (i *fifo) afterLoad() {} + +// +checklocksignore +func (i *fifo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeOperations) + stateSourceObject.Load(1, &i.fileIops) +} + +func (f *fileOperations) StateTypeName() string { + return "pkg/sentry/fs/gofer.fileOperations" +} + +func (f *fileOperations) StateFields() []string { + return []string{ + "inodeOperations", + "dirCursor", + "flags", + } +} + +func (f *fileOperations) beforeSave() {} + +// +checklocksignore +func (f *fileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.inodeOperations) + stateSinkObject.Save(1, &f.dirCursor) + stateSinkObject.Save(2, &f.flags) +} + +// +checklocksignore +func (f *fileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &f.inodeOperations) + stateSourceObject.Load(1, &f.dirCursor) + stateSourceObject.LoadWait(2, &f.flags) + stateSourceObject.AfterLoad(f.afterLoad) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/gofer.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +// +checklocksignore +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +// +checklocksignore +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (i *inodeOperations) StateTypeName() string { + return "pkg/sentry/fs/gofer.inodeOperations" +} + +func (i *inodeOperations) StateFields() []string { + return []string{ + "fileState", + "cachingInodeOps", + } +} + +func (i *inodeOperations) beforeSave() {} + +// +checklocksignore +func (i *inodeOperations) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fileState) + stateSinkObject.Save(1, &i.cachingInodeOps) +} + +func (i *inodeOperations) afterLoad() {} + +// +checklocksignore +func (i *inodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.fileState) + stateSourceObject.Load(1, &i.cachingInodeOps) +} + +func (i *inodeFileState) StateTypeName() string { + return "pkg/sentry/fs/gofer.inodeFileState" +} + +func (i *inodeFileState) StateFields() []string { + return []string{ + "s", + "sattr", + "loading", + "savedUAttr", + "hostMappable", + } +} + +// +checklocksignore +func (i *inodeFileState) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + var loadingValue struct{} = i.saveLoading() + stateSinkObject.SaveValue(2, loadingValue) + stateSinkObject.Save(0, &i.s) + stateSinkObject.Save(1, &i.sattr) + stateSinkObject.Save(3, &i.savedUAttr) + stateSinkObject.Save(4, &i.hostMappable) +} + +// +checklocksignore +func (i *inodeFileState) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.s) + stateSourceObject.LoadWait(1, &i.sattr) + stateSourceObject.Load(3, &i.savedUAttr) + stateSourceObject.Load(4, &i.hostMappable) + stateSourceObject.LoadValue(2, new(struct{}), func(y interface{}) { i.loadLoading(y.(struct{})) }) + stateSourceObject.AfterLoad(i.afterLoad) +} + +func (l *overrideInfo) StateTypeName() string { + return "pkg/sentry/fs/gofer.overrideInfo" +} + +func (l *overrideInfo) StateFields() []string { + return []string{ + "dirent", + "endpoint", + "inode", + } +} + +func (l *overrideInfo) beforeSave() {} + +// +checklocksignore +func (l *overrideInfo) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.dirent) + stateSinkObject.Save(1, &l.endpoint) + stateSinkObject.Save(2, &l.inode) +} + +func (l *overrideInfo) afterLoad() {} + +// +checklocksignore +func (l *overrideInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.dirent) + stateSourceObject.Load(1, &l.endpoint) + stateSourceObject.Load(2, &l.inode) +} + +func (e *overrideMaps) StateTypeName() string { + return "pkg/sentry/fs/gofer.overrideMaps" +} + +func (e *overrideMaps) StateFields() []string { + return []string{ + "pathMap", + } +} + +func (e *overrideMaps) beforeSave() {} + +// +checklocksignore +func (e *overrideMaps) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.pathMap) +} + +func (e *overrideMaps) afterLoad() {} + +// +checklocksignore +func (e *overrideMaps) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.pathMap) +} + +func (s *session) StateTypeName() string { + return "pkg/sentry/fs/gofer.session" +} + +func (s *session) StateFields() []string { + return []string{ + "AtomicRefCount", + "msize", + "version", + "cachePolicy", + "aname", + "superBlockFlags", + "limitHostFDTranslation", + "overlayfsStaleRead", + "connID", + "inodeMappings", + "mounter", + "overrides", + } +} + +// +checklocksignore +func (s *session) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.AtomicRefCount) + stateSinkObject.Save(1, &s.msize) + stateSinkObject.Save(2, &s.version) + stateSinkObject.Save(3, &s.cachePolicy) + stateSinkObject.Save(4, &s.aname) + stateSinkObject.Save(5, &s.superBlockFlags) + stateSinkObject.Save(6, &s.limitHostFDTranslation) + stateSinkObject.Save(7, &s.overlayfsStaleRead) + stateSinkObject.Save(8, &s.connID) + stateSinkObject.Save(9, &s.inodeMappings) + stateSinkObject.Save(10, &s.mounter) + stateSinkObject.Save(11, &s.overrides) +} + +// +checklocksignore +func (s *session) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.AtomicRefCount) + stateSourceObject.LoadWait(1, &s.msize) + stateSourceObject.LoadWait(2, &s.version) + stateSourceObject.LoadWait(3, &s.cachePolicy) + stateSourceObject.LoadWait(4, &s.aname) + stateSourceObject.LoadWait(5, &s.superBlockFlags) + stateSourceObject.Load(6, &s.limitHostFDTranslation) + stateSourceObject.Load(7, &s.overlayfsStaleRead) + stateSourceObject.LoadWait(8, &s.connID) + stateSourceObject.LoadWait(9, &s.inodeMappings) + stateSourceObject.LoadWait(10, &s.mounter) + stateSourceObject.LoadWait(11, &s.overrides) + stateSourceObject.AfterLoad(s.afterLoad) +} + +func init() { + state.Register((*fifo)(nil)) + state.Register((*fileOperations)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inodeOperations)(nil)) + state.Register((*inodeFileState)(nil)) + state.Register((*overrideInfo)(nil)) + state.Register((*overrideMaps)(nil)) + state.Register((*session)(nil)) +} diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go deleted file mode 100644 index 546ee7d04..000000000 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gofer - -import ( - "fmt" - "testing" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/p9/p9test" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// rootTest runs a test with a p9 mock and an fs.InodeOperations created from -// the attached root directory. The root file will be closed and client -// disconnected, but additional files must be closed manually. -func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context, *p9test.Harness, *p9test.Mock, *fs.Inode)) { - t.Run(name, func(t *testing.T) { - h, c := p9test.NewHarness(t) - defer h.Finish() - - // Create a new root. Note that we pass an empty, but non-nil - // map here. This allows tests to extend the root children - // dynamically. - root := h.NewDirectory(map[string]p9test.Generator{})(nil) - - // Return this as the root. - h.Attacher.EXPECT().Attach().Return(root, nil).Times(1) - - // ... and open via the client. - rootFile, err := c.Attach("/") - if err != nil { - t.Fatalf("unable to attach: %v", err) - } - defer rootFile.Close() - - // Wrap an a session. - s := &session{ - mounter: fs.RootOwner, - cachePolicy: cp, - client: c, - } - - // ... and an INode, with only the mode being explicitly valid for now. - ctx := contexttest.Context(t) - sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ - file: rootFile, - }, root.QID, p9.AttrMaskAll(), root.Attr) - m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) - rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) - - // Ensure that the cache is fully invalidated, so that any - // close actions actually take place before the full harness is - // torn down. - defer func() { - m.FlushDirentRefs() - - // Wait for all resources to be released, otherwise the - // operations may fail after we close the rootFile. - fs.AsyncBarrier() - }() - - // Execute the test. - fn(ctx, h, root, rootInode) - }) -} - -func TestLookup(t *testing.T) { - type lookupTest struct { - // Name of the test. - name string - - // Expected return value. - want error - } - - tests := []lookupTest{ - { - name: "mock Walk passes (function succeeds)", - want: nil, - }, - { - name: "mock Walk fails (function fails)", - want: unix.ENOENT, - }, - } - - const file = "file" // The walked target file. - - for _, test := range tests { - rootTest(t, test.name, cacheNone, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { - // Setup the appropriate result. - rootFile.WalkCallback = func() error { - return test.want - } - if test.want == nil { - // Set the contents of the root. We expect a - // normal file generator for ppp above. This is - // overriden by setting WalkErr in the mock. - rootFile.AddChild(file, h.NewFile()) - } - - // Call function. - dirent, err := rootInode.Lookup(ctx, file) - - // Unwrap the InodeOperations. - var newInodeOperations fs.InodeOperations - if dirent != nil { - if dirent.IsNegative() { - err = unix.ENOENT - } else { - newInodeOperations = dirent.Inode.InodeOperations - } - } - - // Check return values. - if err != test.want { - t.Errorf("Lookup got err %v, want %v", err, test.want) - } - if err == nil && newInodeOperations == nil { - t.Errorf("Lookup got non-nil err and non-nil node, wanted at least one non-nil") - } - }) - } -} - -func TestRevalidation(t *testing.T) { - type revalidationTest struct { - cachePolicy cachePolicy - - // Whether dirent should be reloaded before any modifications. - preModificationWantReload bool - - // Whether dirent should be reloaded after updating an unstable - // attribute on the remote fs. - postModificationWantReload bool - - // Whether dirent unstable attributes should be updated after - // updating an attribute on the remote fs. - postModificationWantUpdatedAttrs bool - - // Whether dirent should be reloaded after the remote has - // removed the file. - postRemovalWantReload bool - } - - tests := []revalidationTest{ - { - // Policy cacheNone causes Revalidate to always return - // true. - cachePolicy: cacheNone, - preModificationWantReload: true, - postModificationWantReload: true, - postModificationWantUpdatedAttrs: true, - postRemovalWantReload: true, - }, - { - // Policy cacheAll causes Revalidate to always return - // false. - cachePolicy: cacheAll, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: false, - postRemovalWantReload: false, - }, - { - // Policy cacheAllWritethrough causes Revalidate to - // always return false. - cachePolicy: cacheAllWritethrough, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: false, - postRemovalWantReload: false, - }, - { - // Policy cacheRemoteRevalidating causes Revalidate to - // return update cached unstable attrs, and returns - // true only when the remote inode itself has been - // removed or replaced. - cachePolicy: cacheRemoteRevalidating, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: true, - postRemovalWantReload: true, - }, - } - - const file = "file" // The file walked below. - - for _, test := range tests { - name := fmt.Sprintf("cachepolicy=%s", test.cachePolicy) - rootTest(t, name, test.cachePolicy, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { - // Wrap in a dirent object. - rootDir := fs.NewDirent(ctx, rootInode, "root") - - // Create a mock file a child of the root. We save when - // this is generated, so that when the time changed, we - // can update the original entry. - var origMocks []*p9test.Mock - rootFile.AddChild(file, func(parent *p9test.Mock) *p9test.Mock { - // Regular a regular file that has a consistent - // path number. This might be used by - // validation so we don't change it. - m := h.NewMock(parent, 0, p9.Attr{ - Mode: p9.ModeRegular, - }) - origMocks = append(origMocks, m) - return m - }) - - // Do the walk. - dirent, err := rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - - // We must release the dirent, of the test will fail - // with a reference leak. This is tracked by p9test. - defer dirent.DecRef(ctx) - - // Walk again. Depending on the cache policy, we may - // get a new dirent. - newDirent, err := rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - if test.preModificationWantReload && dirent == newDirent { - t.Errorf("Lookup with cachePolicy=%s got old dirent %+v, wanted a new dirent", test.cachePolicy, dirent) - } - if !test.preModificationWantReload && dirent != newDirent { - t.Errorf("Lookup with cachePolicy=%s got new dirent %+v, wanted old dirent %+v", test.cachePolicy, newDirent, dirent) - } - newDirent.DecRef(ctx) // See above. - - // Modify the underlying mocked file's modification - // time for the next walk that occurs. - nowSeconds := time.Now().Unix() - rootFile.AddChild(file, func(parent *p9test.Mock) *p9test.Mock { - // Ensure that the path is the same as above, - // but we change only the modification time of - // the file. - return h.NewMock(parent, 0, p9.Attr{ - Mode: p9.ModeRegular, - MTimeSeconds: uint64(nowSeconds), - }) - }) - - // We also modify the original time, so that GetAttr - // behaves as expected for the caching case. - for _, m := range origMocks { - m.Attr.MTimeSeconds = uint64(nowSeconds) - } - - // Walk again. Depending on the cache policy, we may - // get a new dirent. - newDirent, err = rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - if test.postModificationWantReload && dirent == newDirent { - t.Errorf("Lookup with cachePolicy=%s got old dirent, wanted a new dirent", test.cachePolicy) - } - if !test.postModificationWantReload && dirent != newDirent { - t.Errorf("Lookup with cachePolicy=%s got new dirent, wanted old dirent", test.cachePolicy) - } - uattrs, err := newDirent.Inode.UnstableAttr(ctx) - if err != nil { - t.Fatalf("Error getting unstable attrs: %v", err) - } - gotModTimeSeconds := uattrs.ModificationTime.Seconds() - if test.postModificationWantUpdatedAttrs && gotModTimeSeconds != nowSeconds { - t.Fatalf("Lookup with cachePolicy=%s got new modification time %v, wanted %v", test.cachePolicy, gotModTimeSeconds, nowSeconds) - } - newDirent.DecRef(ctx) // See above. - - // Remove the file from the remote fs, subsequent walks - // should now fail to find anything. - rootFile.RemoveChild(file) - - // Walk again. Depending on the cache policy, we may - // get ENOENT. - newDirent, err = rootDir.Walk(ctx, rootDir, file) - if test.postRemovalWantReload && err == nil { - t.Errorf("Lookup with cachePolicy=%s got nil error, wanted ENOENT", test.cachePolicy) - } - if !test.postRemovalWantReload && (err != nil || dirent != newDirent) { - t.Errorf("Lookup with cachePolicy=%s got new dirent and error %v, wanted old dirent and nil error", test.cachePolicy, err) - } - if err == nil { - newDirent.DecRef(ctx) // See above. - } - }) - } -} diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD deleted file mode 100644 index 24fc6305c..000000000 --- a/pkg/sentry/fs/host/BUILD +++ /dev/null @@ -1,87 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "host", - srcs = [ - "control.go", - "descriptor.go", - "descriptor_state.go", - "device.go", - "file.go", - "host.go", - "inode.go", - "inode_state.go", - "ioctl_unsafe.go", - "socket.go", - "socket_iovec.go", - "socket_state.go", - "socket_unsafe.go", - "tty.go", - "util.go", - "util_amd64_unsafe.go", - "util_arm64_unsafe.go", - "util_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/log", - "//pkg/marshal/primitive", - "//pkg/refs", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/hostfd", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sentry/uniqueid", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/unet", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "host_test", - size = "small", - srcs = [ - "descriptor_test.go", - "inode_test.go", - "socket_test.go", - "wait_test.go", - ], - library = ":host", - deps = [ - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix/transport", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go deleted file mode 100644 index cb809ab2d..000000000 --- a/pkg/sentry/fs/host/descriptor_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "io/ioutil" - "path/filepath" - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestDescriptorRelease(t *testing.T) { - for _, tc := range []struct { - name string - saveable bool - wouldBlock bool - }{ - {name: "all false"}, - {name: "saveable", saveable: true}, - {name: "wouldBlock", wouldBlock: true}, - } { - t.Run(tc.name, func(t *testing.T) { - dir, err := ioutil.TempDir("", "descriptor_test") - if err != nil { - t.Fatal("ioutil.TempDir() failed:", err) - } - - fd, err := unix.Open(filepath.Join(dir, "file"), unix.O_RDWR|unix.O_CREAT, 0666) - if err != nil { - t.Fatal("failed to open temp file:", err) - } - - // FD ownership is transferred to the descritor. - queue := &waiter.Queue{} - d, err := newDescriptor(fd, tc.saveable, tc.wouldBlock, queue) - if err != nil { - unix.Close(fd) - t.Fatalf("newDescriptor(%d, %t, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) - } - if tc.saveable { - if d.origFD < 0 { - t.Errorf("saveable descriptor must preserve origFD, desc: %+v", d) - } - } - if tc.wouldBlock { - if !fdnotifier.HasFD(int32(d.value)) { - t.Errorf("FD not registered with notifier, desc: %+v", d) - } - } - - oldVal := d.value - d.Release() - if d.value != -1 { - t.Errorf("d.value want: -1, got: %d", d.value) - } - if tc.wouldBlock { - if fdnotifier.HasFD(int32(oldVal)) { - t.Errorf("FD not unregistered with notifier, desc: %+v", d) - } - } - }) - } -} diff --git a/pkg/sentry/fs/host/host_amd64_unsafe_state_autogen.go b/pkg/sentry/fs/host/host_amd64_unsafe_state_autogen.go new file mode 100644 index 000000000..14cecc49f --- /dev/null +++ b/pkg/sentry/fs/host/host_amd64_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 +// +build amd64 + +package host diff --git a/pkg/sentry/fs/host/host_arm64_unsafe_state_autogen.go b/pkg/sentry/fs/host/host_arm64_unsafe_state_autogen.go new file mode 100644 index 000000000..a714e619e --- /dev/null +++ b/pkg/sentry/fs/host/host_arm64_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package host diff --git a/pkg/sentry/fs/host/host_state_autogen.go b/pkg/sentry/fs/host/host_state_autogen.go new file mode 100644 index 000000000..b06247919 --- /dev/null +++ b/pkg/sentry/fs/host/host_state_autogen.go @@ -0,0 +1,220 @@ +// automatically generated by stateify. + +package host + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *descriptor) StateTypeName() string { + return "pkg/sentry/fs/host.descriptor" +} + +func (d *descriptor) StateFields() []string { + return []string{ + "origFD", + "wouldBlock", + } +} + +// +checklocksignore +func (d *descriptor) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.origFD) + stateSinkObject.Save(1, &d.wouldBlock) +} + +// +checklocksignore +func (d *descriptor) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.origFD) + stateSourceObject.Load(1, &d.wouldBlock) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (f *fileOperations) StateTypeName() string { + return "pkg/sentry/fs/host.fileOperations" +} + +func (f *fileOperations) StateFields() []string { + return []string{ + "iops", + "dirCursor", + } +} + +func (f *fileOperations) beforeSave() {} + +// +checklocksignore +func (f *fileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.iops) + stateSinkObject.Save(1, &f.dirCursor) +} + +func (f *fileOperations) afterLoad() {} + +// +checklocksignore +func (f *fileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &f.iops) + stateSourceObject.Load(1, &f.dirCursor) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/host.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +// +checklocksignore +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +// +checklocksignore +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (i *inodeOperations) StateTypeName() string { + return "pkg/sentry/fs/host.inodeOperations" +} + +func (i *inodeOperations) StateFields() []string { + return []string{ + "fileState", + "cachingInodeOps", + } +} + +func (i *inodeOperations) beforeSave() {} + +// +checklocksignore +func (i *inodeOperations) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fileState) + stateSinkObject.Save(1, &i.cachingInodeOps) +} + +func (i *inodeOperations) afterLoad() {} + +// +checklocksignore +func (i *inodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.fileState) + stateSourceObject.Load(1, &i.cachingInodeOps) +} + +func (i *inodeFileState) StateTypeName() string { + return "pkg/sentry/fs/host.inodeFileState" +} + +func (i *inodeFileState) StateFields() []string { + return []string{ + "descriptor", + "sattr", + "savedUAttr", + } +} + +func (i *inodeFileState) beforeSave() {} + +// +checklocksignore +func (i *inodeFileState) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + if !state.IsZeroValue(&i.queue) { + state.Failf("queue is %#v, expected zero", &i.queue) + } + stateSinkObject.Save(0, &i.descriptor) + stateSinkObject.Save(1, &i.sattr) + stateSinkObject.Save(2, &i.savedUAttr) +} + +// +checklocksignore +func (i *inodeFileState) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.descriptor) + stateSourceObject.LoadWait(1, &i.sattr) + stateSourceObject.Load(2, &i.savedUAttr) + stateSourceObject.AfterLoad(i.afterLoad) +} + +func (c *ConnectedEndpoint) StateTypeName() string { + return "pkg/sentry/fs/host.ConnectedEndpoint" +} + +func (c *ConnectedEndpoint) StateFields() []string { + return []string{ + "ref", + "queue", + "path", + "srfd", + "stype", + } +} + +// +checklocksignore +func (c *ConnectedEndpoint) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.ref) + stateSinkObject.Save(1, &c.queue) + stateSinkObject.Save(2, &c.path) + stateSinkObject.Save(3, &c.srfd) + stateSinkObject.Save(4, &c.stype) +} + +// +checklocksignore +func (c *ConnectedEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.ref) + stateSourceObject.Load(1, &c.queue) + stateSourceObject.Load(2, &c.path) + stateSourceObject.LoadWait(3, &c.srfd) + stateSourceObject.Load(4, &c.stype) + stateSourceObject.AfterLoad(c.afterLoad) +} + +func (t *TTYFileOperations) StateTypeName() string { + return "pkg/sentry/fs/host.TTYFileOperations" +} + +func (t *TTYFileOperations) StateFields() []string { + return []string{ + "fileOperations", + "session", + "fgProcessGroup", + "termios", + } +} + +func (t *TTYFileOperations) beforeSave() {} + +// +checklocksignore +func (t *TTYFileOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.fileOperations) + stateSinkObject.Save(1, &t.session) + stateSinkObject.Save(2, &t.fgProcessGroup) + stateSinkObject.Save(3, &t.termios) +} + +func (t *TTYFileOperations) afterLoad() {} + +// +checklocksignore +func (t *TTYFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.fileOperations) + stateSourceObject.Load(1, &t.session) + stateSourceObject.Load(2, &t.fgProcessGroup) + stateSourceObject.Load(3, &t.termios) +} + +func init() { + state.Register((*descriptor)(nil)) + state.Register((*fileOperations)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inodeOperations)(nil)) + state.Register((*inodeFileState)(nil)) + state.Register((*ConnectedEndpoint)(nil)) + state.Register((*TTYFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/host/host_unsafe_state_autogen.go b/pkg/sentry/fs/host/host_unsafe_state_autogen.go new file mode 100644 index 000000000..b2d8c661f --- /dev/null +++ b/pkg/sentry/fs/host/host_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package host diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go deleted file mode 100644 index 11738871b..000000000 --- a/pkg/sentry/fs/host/inode_test.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/sentry/contexttest" -) - -// TestCloseFD verifies fds will be closed. -func TestCloseFD(t *testing.T) { - var p [2]int - if err := unix.Pipe(p[0:]); err != nil { - t.Fatalf("Failed to create pipe %v", err) - } - defer unix.Close(p[0]) - defer unix.Close(p[1]) - - // Use the write-end because we will detect if it's closed on the read end. - ctx := contexttest.Context(t) - file, err := NewFile(ctx, p[1]) - if err != nil { - t.Fatalf("Failed to create File: %v", err) - } - file.DecRef(ctx) - - s := make([]byte, 10) - if c, err := unix.Read(p[0], s); c != 0 || err != nil { - t.Errorf("want 0, nil (EOF) from read end, got %v, %v", c, err) - } -} diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go deleted file mode 100644 index f7014b6b1..000000000 --- a/pkg/sentry/fs/host/socket_test.go +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "reflect" - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -var ( - // Make sure that ConnectedEndpoint implements transport.ConnectedEndpoint. - _ = transport.ConnectedEndpoint(new(ConnectedEndpoint)) - - // Make sure that ConnectedEndpoint implements transport.Receiver. - _ = transport.Receiver(new(ConnectedEndpoint)) -) - -func getFl(fd int) (uint32, error) { - fl, _, err := unix.RawSyscall(unix.SYS_FCNTL, uintptr(fd), unix.F_GETFL, 0) - if err == 0 { - return uint32(fl), nil - } - return 0, err -} - -func TestSocketIsBlocking(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - - fl, err := getFl(pair[0]) - if err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) - } - if fl&unix.O_NONBLOCK == unix.O_NONBLOCK { - t.Fatalf("Expected socket %v to be blocking", pair[0]) - } - if fl, err = getFl(pair[1]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[1], err) - } - if fl&unix.O_NONBLOCK == unix.O_NONBLOCK { - t.Fatalf("Expected socket %v to be blocking", pair[1]) - } - ctx := contexttest.Context(t) - sock, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) failed => %v", pair[0], err) - } - defer sock.DecRef(ctx) - // Test that the socket now is non-blocking. - if fl, err = getFl(pair[0]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) - } - if fl&unix.O_NONBLOCK != unix.O_NONBLOCK { - t.Errorf("Expected socket %v to have become non-blocking", pair[0]) - } - if fl, err = getFl(pair[1]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[1], err) - } - if fl&unix.O_NONBLOCK == unix.O_NONBLOCK { - t.Errorf("Did not expect socket %v to become non-blocking", pair[1]) - } -} - -func TestSocketWritev(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - ctx := contexttest.Context(t) - socket, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer socket.DecRef(ctx) - buf := []byte("hello world\n") - n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(buf)) - if err != nil { - t.Fatalf("socket writev failed: %v", err) - } - - if n != int64(len(buf)) { - t.Fatalf("socket writev wrote incorrect bytes: %d", n) - } -} - -func TestSocketWritevLen0(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - ctx := contexttest.Context(t) - socket, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer socket.DecRef(ctx) - n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(nil)) - if err != nil { - t.Fatalf("socket writev failed: %v", err) - } - - if n != 0 { - t.Fatalf("socket writev wrote incorrect bytes: %d", n) - } -} - -func TestSocketSendMsgLen0(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - ctx := contexttest.Context(t) - sfile, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer sfile.DecRef(ctx) - - s := sfile.FileOperations.(socket.Socket) - n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, false, ktime.Time{}, socket.ControlMessages{}) - if n != 0 { - t.Fatalf("socket sendmsg() failed: %v wrote: %d", terr, n) - } - - if terr != nil { - t.Fatalf("socket sendmsg() failed: %v", terr) - } -} - -func TestListen(t *testing.T) { - pair, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("unix.Socket(unix.AF_UNIX, unix.SOCK_STREAM, 0) => %v", err) - } - ctx := contexttest.Context(t) - sfile1, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer sfile1.DecRef(ctx) - socket1 := sfile1.FileOperations.(socket.Socket) - - sfile2, err := newSocket(ctx, pair[1], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[1], err) - } - defer sfile2.DecRef(ctx) - socket2 := sfile2.FileOperations.(socket.Socket) - - // Socketpairs can not be listened to. - if err := socket1.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket1.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } - if err := socket2.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket2.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } - - // Create a Unix socket, do not bind it. - sock, err := unix.Socket(unix.AF_UNIX, unix.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("unix.Socket(unix.AF_UNIX, unix.SOCK_STREAM, 0) => %v", err) - } - sfile3, err := newSocket(ctx, sock, false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", sock, err) - } - defer sfile3.DecRef(ctx) - socket3 := sfile3.FileOperations.(socket.Socket) - - // This socket is not bound so we can't listen on it. - if err := socket3.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket3.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } -} - -func TestPasscred(t *testing.T) { - e := &ConnectedEndpoint{} - if got, want := e.Passcred(), false; got != want { - t.Errorf("Got %#v.Passcred() = %t, want = %t", e, got, want) - } -} - -func TestGetLocalAddress(t *testing.T) { - e := &ConnectedEndpoint{path: "foo"} - want := tcpip.FullAddress{Addr: tcpip.Address("foo")} - if got, err := e.GetLocalAddress(); err != nil || got != want { - t.Errorf("Got %#v.GetLocalAddress() = %#v, %v, want = %#v, %v", e, got, err, want, nil) - } -} - -func TestQueuedSize(t *testing.T) { - e := &ConnectedEndpoint{} - tests := []struct { - name string - f func() int64 - }{ - {"SendQueuedSize", e.SendQueuedSize}, - {"RecvQueuedSize", e.RecvQueuedSize}, - } - - for _, test := range tests { - if got, want := test.f(), int64(-1); got != want { - t.Errorf("Got %#v.%s() = %d, want = %d", e, test.name, got, want) - } - } -} - -func TestRelease(t *testing.T) { - f, err := unix.Socket(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_NONBLOCK|unix.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c := &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)} - want := &ConnectedEndpoint{queue: c.queue} - ctx := contexttest.Context(t) - want.ref.DecRef(ctx) - fdnotifier.AddFD(int32(c.file.FD()), nil) - c.Release(ctx) - if !reflect.DeepEqual(c, want) { - t.Errorf("got = %#v, want = %#v", c, want) - } -} diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go deleted file mode 100644 index bd6188e03..000000000 --- a/pkg/sentry/fs/host/wait_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "testing" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestWait(t *testing.T) { - var fds [2]int - err := unix.Pipe(fds[:]) - if err != nil { - t.Fatalf("Unable to create pipe: %v", err) - } - - defer unix.Close(fds[1]) - - ctx := contexttest.Context(t) - file, err := NewFile(ctx, fds[0]) - if err != nil { - unix.Close(fds[0]) - t.Fatalf("NewFile failed: %v", err) - } - - defer file.DecRef(ctx) - - r := file.Readiness(waiter.ReadableEvents) - if r != 0 { - t.Fatalf("File is ready for read when it shouldn't be.") - } - - e, ch := waiter.NewChannelEntry(nil) - file.EventRegister(&e, waiter.ReadableEvents) - defer file.EventUnregister(&e) - - // Check that there are no notifications yet. - if len(ch) != 0 { - t.Fatalf("Channel is non-empty") - } - - // Write to the pipe, so it should be writable now. - unix.Write(fds[1], []byte{1}) - - // Check that we get a notification. We need to yield the current thread - // so that the fdnotifier can deliver notifications, so we use a - // 1-second timeout instead of just checking the length of the channel. - select { - case <-ch: - case <-time.After(1 * time.Second): - t.Fatalf("Channel not notified") - } -} diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go deleted file mode 100644 index a3800d700..000000000 --- a/pkg/sentry/fs/inode_overlay_test.go +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -func TestLookup(t *testing.T) { - ctx := contexttest.Context(t) - for _, test := range []struct { - // Test description. - desc string - - // Lookup parameters. - dir *fs.Inode - name string - - // Want from lookup. - found bool - hasUpper bool - hasLower bool - }{ - { - desc: "no upper, lower has name", - dir: fs.NewTestOverlayDir(ctx, - nil, /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - { - desc: "no lower, upper has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - nil, /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, only lower has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "b", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - { - desc: "upper and lower, only upper has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "b", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, both have file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, both have directory", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: true, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: true, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: true, - }, - { - desc: "upper and lower, upper negative masks lower file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, nil, []string{"a"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: false, - hasUpper: false, - hasLower: false, - }, - { - desc: "upper and lower, upper negative does not mask lower file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, nil, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - } { - t.Run(test.desc, func(t *testing.T) { - dirent, err := test.dir.Lookup(ctx, test.name) - if test.found && (linuxerr.Equals(linuxerr.ENOENT, err) || dirent.IsNegative()) { - t.Fatalf("lookup %q expected to find positive dirent, got dirent %v err %v", test.name, dirent, err) - } - if !test.found { - if !linuxerr.Equals(linuxerr.ENOENT, err) && !dirent.IsNegative() { - t.Errorf("lookup %q expected to return ENOENT or negative dirent, got dirent %v err %v", test.name, dirent, err) - } - // Nothing more to check. - return - } - if hasUpper := dirent.Inode.TestHasUpperFS(); hasUpper != test.hasUpper { - t.Fatalf("lookup got upper filesystem %v, want %v", hasUpper, test.hasUpper) - } - if hasLower := dirent.Inode.TestHasLowerFS(); hasLower != test.hasLower { - t.Errorf("lookup got lower filesystem %v, want %v", hasLower, test.hasLower) - } - }) - } -} - -func TestLookupRevalidation(t *testing.T) { - // File name used in the tests. - fileName := "foofile" - ctx := contexttest.Context(t) - for _, tc := range []struct { - // Test description. - desc string - - // Upper and lower fs for the overlay. - upper *fs.Inode - lower *fs.Inode - - // Whether the upper requires revalidation. - revalidate bool - - // Whether we should get the same dirent on second lookup. - wantSame bool - }{ - { - desc: "file from upper with no revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, nil, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from upper with revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, nil, nil), - revalidate: true, - wantSame: false, - }, - { - desc: "file from lower with no revalidation", - upper: newTestRamfsDir(ctx, nil, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from lower with revalidation", - upper: newTestRamfsDir(ctx, nil, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: true, - // The file does not exist in the upper, so we do not - // need to revalidate it. - wantSame: true, - }, - { - desc: "file from upper and lower with no revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from upper and lower with revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: true, - wantSame: false, - }, - } { - t.Run(tc.desc, func(t *testing.T) { - root := fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root") - ctx = &rootContext{ - Context: ctx, - root: root, - } - overlay := fs.NewDirent(ctx, fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay") - // Lookup the file twice through the overlay. - first, err := overlay.Walk(ctx, root, fileName) - if err != nil { - t.Fatalf("overlay.Walk(%q) failed: %v", fileName, err) - } - second, err := overlay.Walk(ctx, root, fileName) - if err != nil { - t.Fatalf("overlay.Walk(%q) failed: %v", fileName, err) - } - - if tc.wantSame && first != second { - t.Errorf("dirent lookup got different dirents, wanted same\nfirst=%+v\nsecond=%+v", first, second) - } else if !tc.wantSame && first == second { - t.Errorf("dirent lookup got the same dirent, wanted different: %+v", first) - } - }) - } -} - -func TestCacheFlush(t *testing.T) { - ctx := contexttest.Context(t) - - // Upper and lower each have a file. - upperFileName := "file-from-upper" - lowerFileName := "file-from-lower" - upper := newTestRamfsDir(ctx, []dirContent{{name: upperFileName}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: lowerFileName}}, nil) - - overlay := fs.NewTestOverlayDir(ctx, upper, lower, true /* revalidate */) - - mns, err := fs.NewMountNamespace(ctx, overlay) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - root := mns.Root() - defer root.DecRef(ctx) - - ctx = &rootContext{ - Context: ctx, - root: root, - } - - for _, fileName := range []string{upperFileName, lowerFileName} { - // Walk to the file. - maxTraversals := uint(0) - dirent, err := mns.FindInode(ctx, root, nil, fileName, &maxTraversals) - if err != nil { - t.Fatalf("FindInode(%q) failed: %v", fileName, err) - } - - // Get a file from the dirent. - file, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile() failed: %v", err) - } - - // The dirent should have 3 refs, one from us, one from the - // file, and one from the dirent cache. - // dirent cache. - if got, want := dirent.ReadRefs(), 3; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Drop the file reference. - file.DecRef(ctx) - - // Dirent should have 2 refs left. - if got, want := dirent.ReadRefs(), 2; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Flush the dirent cache. - mns.FlushMountSourceRefs() - - // Dirent should have 1 ref left from the dirent cache. - if got, want := dirent.ReadRefs(), 1; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Drop our ref. - dirent.DecRef(ctx) - - // We should be back to zero refs. - if got, want := dirent.ReadRefs(), 0; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - } - -} - -type dir struct { - fs.InodeOperations - - // List of negative child names. - negative []string - - // ReaddirCalled records whether Readdir was called on a file - // corresponding to this inode. - ReaddirCalled bool -} - -// GetXattr implements InodeOperations.GetXattr. -func (d *dir) GetXattr(_ context.Context, _ *fs.Inode, name string, _ uint64) (string, error) { - for _, n := range d.negative { - if name == fs.XattrOverlayWhiteout(n) { - return "y", nil - } - } - return "", linuxerr.ENOATTR -} - -// GetFile implements InodeOperations.GetFile. -func (d *dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - file, err := d.InodeOperations.GetFile(ctx, dirent, flags) - if err != nil { - return nil, err - } - defer file.DecRef(ctx) - // Wrap the file's FileOperations in a dirFile. - fops := &dirFile{ - FileOperations: file.FileOperations, - inode: d, - } - return fs.NewFile(ctx, dirent, flags, fops), nil -} - -type dirContent struct { - name string - dir bool -} - -type dirFile struct { - fs.FileOperations - inode *dir -} - -type inode struct { - fsutil.InodeGenericChecker `state:"nosave"` - fsutil.InodeNoExtendedAttributes `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` - fsutil.InodeNoopWriteOut `state:"nosave"` - fsutil.InodeNotAllocatable `state:"nosave"` - fsutil.InodeNotDirectory `state:"nosave"` - fsutil.InodeNotMappable `state:"nosave"` - fsutil.InodeNotSocket `state:"nosave"` - fsutil.InodeNotSymlink `state:"nosave"` - fsutil.InodeNotTruncatable `state:"nosave"` - fsutil.InodeNotVirtual `state:"nosave"` - - fsutil.InodeSimpleAttributes - fsutil.InodeStaticFileGetter -} - -// Readdir implements fs.FileOperations.Readdir. It sets the ReaddirCalled -// field on the inode. -func (f *dirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { - f.inode.ReaddirCalled = true - return f.FileOperations.Readdir(ctx, file, ser) -} - -func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - inode := fs.NewInode(ctx, &inode{ - InodeStaticFileGetter: fsutil.InodeStaticFileGetter{ - Contents: []byte("foobar"), - }, - }, msrc, fs.StableAttr{Type: fs.RegularFile}) - return inode -} - -func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []string) *fs.Inode { - msrc := fs.NewPseudoMountSource(ctx) - contents := make(map[string]*fs.Inode) - for _, c := range contains { - if c.dir { - contents[c.name] = newTestRamfsDir(ctx, nil, nil) - } else { - contents[c.name] = newTestRamfsInode(ctx, msrc) - } - } - dops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermissions{ - User: fs.PermMask{Read: true, Execute: true}, - }) - return fs.NewInode(ctx, &dir{ - InodeOperations: dops, - negative: negative, - }, msrc, fs.StableAttr{Type: fs.Directory}) -} diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD deleted file mode 100644 index c09d8463b..000000000 --- a/pkg/sentry/fs/lock/BUILD +++ /dev/null @@ -1,62 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "lock_range", - out = "lock_range.go", - package = "lock", - prefix = "Lock", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "lock_set", - out = "lock_set.go", - consts = { - "minDegree": "3", - }, - package = "lock", - prefix = "Lock", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "LockRange", - "Value": "Lock", - "Functions": "lockSetFunctions", - }, -) - -go_library( - name = "lock", - srcs = [ - "lock.go", - "lock_range.go", - "lock_set.go", - "lock_set_functions.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/log", - "//pkg/sync", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "lock_test", - size = "small", - srcs = [ - "lock_range_test.go", - "lock_test.go", - ], - library = ":lock", - deps = ["@org_golang_x_sys//unix:go_default_library"], -) diff --git a/pkg/sentry/fs/lock/lock_range.go b/pkg/sentry/fs/lock/lock_range.go new file mode 100644 index 000000000..13a2cce6e --- /dev/null +++ b/pkg/sentry/fs/lock/lock_range.go @@ -0,0 +1,76 @@ +package lock + +// A Range represents a contiguous range of T. +// +// +stateify savable +type LockRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +// +//go:nosplit +func (r LockRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +// +//go:nosplit +func (r LockRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +// +//go:nosplit +func (r LockRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +// +//go:nosplit +func (r LockRange) Overlaps(r2 LockRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +// +//go:nosplit +func (r LockRange) IsSupersetOf(r2 LockRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +// +//go:nosplit +func (r LockRange) Intersect(r2 LockRange) LockRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +// +//go:nosplit +func (r LockRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/fs/lock/lock_range_test.go b/pkg/sentry/fs/lock/lock_range_test.go deleted file mode 100644 index 2b6f8630b..000000000 --- a/pkg/sentry/fs/lock/lock_range_test.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package lock - -import ( - "testing" - - "golang.org/x/sys/unix" -) - -func TestComputeRange(t *testing.T) { - tests := []struct { - // Description of test. - name string - - // Requested start of the lock range. - start int64 - - // Requested length of the lock range, - // can be negative :( - length int64 - - // Pre-computed file offset based on whence. - // Will be added to start. - offset int64 - - // Expected error. - err error - - // If error is nil, the expected LockRange. - LockRange - }{ - { - name: "offset, start, and length all zero", - LockRange: LockRange{Start: 0, End: LockEOF}, - }, - { - name: "zero offset, zero start, positive length", - start: 0, - length: 4096, - offset: 0, - LockRange: LockRange{Start: 0, End: 4096}, - }, - { - name: "zero offset, negative start", - start: -4096, - offset: 0, - err: unix.EINVAL, - }, - { - name: "large offset, negative start, positive length", - start: -2048, - length: 2048, - offset: 4096, - LockRange: LockRange{Start: 2048, End: 4096}, - }, - { - name: "large offset, negative start, zero length", - start: -2048, - length: 0, - offset: 4096, - LockRange: LockRange{Start: 2048, End: LockEOF}, - }, - { - name: "zero offset, zero start, negative length", - start: 0, - length: -4096, - offset: 0, - err: unix.EINVAL, - }, - { - name: "large offset, zero start, negative length", - start: 0, - length: -4096, - offset: 4096, - LockRange: LockRange{Start: 0, End: 4096}, - }, - { - name: "offset, start, and length equal, length is negative", - start: 1024, - length: -1024, - offset: 1024, - LockRange: LockRange{Start: 1024, End: 2048}, - }, - { - name: "offset, start, and length equal, start is negative", - start: -1024, - length: 1024, - offset: 1024, - LockRange: LockRange{Start: 0, End: 1024}, - }, - { - name: "offset, start, and length equal, offset is negative", - start: 1024, - length: 1024, - offset: -1024, - LockRange: LockRange{Start: 0, End: 1024}, - }, - { - name: "offset, start, and length equal, all negative", - start: -1024, - length: -1024, - offset: -1024, - err: unix.EINVAL, - }, - { - name: "offset, start, and length equal, all positive", - start: 1024, - length: 1024, - offset: 1024, - LockRange: LockRange{Start: 2048, End: 3072}, - }, - } - - for _, test := range tests { - rng, err := ComputeRange(test.start, test.length, test.offset) - if err != test.err { - t.Errorf("%s: lockRange(%d, %d, %d) got error %v, want %v", test.name, test.start, test.length, test.offset, err, test.err) - continue - } - if err == nil && rng != test.LockRange { - t.Errorf("%s: lockRange(%d, %d, %d) got LockRange %v, want %v", test.name, test.start, test.length, test.offset, rng, test.LockRange) - } - } -} diff --git a/pkg/sentry/fs/lock/lock_set.go b/pkg/sentry/fs/lock/lock_set.go new file mode 100644 index 000000000..4bc830883 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_set.go @@ -0,0 +1,1643 @@ +package lock + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const LocktrackGaps = 0 + +var _ = uint8(LocktrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type LockdynamicGap [LocktrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *LockdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *LockdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + LockminDegree = 3 + + LockmaxDegree = 2 * LockminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type LockSet struct { + root Locknode `state:".(*LockSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *LockSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *LockSet) IsEmptyRange(r LockRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *LockSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *LockSet) SpanRange(r LockRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *LockSet) FirstSegment() LockIterator { + if s.root.nrSegments == 0 { + return LockIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *LockSet) LastSegment() LockIterator { + if s.root.nrSegments == 0 { + return LockIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *LockSet) FirstGap() LockGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return LockGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *LockSet) LastGap() LockGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return LockGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *LockSet) Find(key uint64) (LockIterator, LockGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return LockIterator{n, i}, LockGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return LockIterator{}, LockGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *LockSet) FindSegment(key uint64) LockIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *LockSet) LowerBoundSegment(min uint64) LockIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *LockSet) UpperBoundSegment(max uint64) LockIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *LockSet) FindGap(key uint64) LockGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *LockSet) LowerBoundGap(min uint64) LockGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *LockSet) UpperBoundGap(max uint64) LockGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *LockSet) Add(r LockRange, val Lock) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *LockSet) AddWithoutMerging(r LockRange, val Lock) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *LockSet) Insert(gap LockGapIterator, r LockRange, val Lock) LockIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (lockSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := LocktrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (lockSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (lockSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := LocktrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *LockSet) InsertWithoutMerging(gap LockGapIterator, r LockRange, val Lock) LockIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *LockSet) InsertWithoutMergingUnchecked(gap LockGapIterator, r LockRange, val Lock) LockIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := LocktrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return LockIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *LockSet) Remove(seg LockIterator) LockGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if LocktrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + lockSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if LocktrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(LockGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *LockSet) RemoveAll() { + s.root = Locknode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *LockSet) RemoveRange(r LockRange) LockGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *LockSet) Merge(first, second LockIterator) LockIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *LockSet) MergeUnchecked(first, second LockIterator) LockIterator { + if first.End() == second.Start() { + if mval, ok := (lockSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return LockIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *LockSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *LockSet) MergeRange(r LockRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *LockSet) MergeAdjacent(r LockRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *LockSet) Split(seg LockIterator, split uint64) (LockIterator, LockIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *LockSet) SplitUnchecked(seg LockIterator, split uint64) (LockIterator, LockIterator) { + val1, val2 := (lockSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), LockRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *LockSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *LockSet) Isolate(seg LockIterator, r LockRange) LockIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *LockSet) ApplyContiguous(r LockRange, fn func(seg LockIterator)) LockGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return LockGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return LockGapIterator{} + } + } +} + +// +stateify savable +type Locknode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Locknode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap LockdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [LockmaxDegree - 1]LockRange + values [LockmaxDegree - 1]Lock + children [LockmaxDegree]*Locknode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Locknode) firstSegment() LockIterator { + for n.hasChildren { + n = n.children[0] + } + return LockIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Locknode) lastSegment() LockIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return LockIterator{n, n.nrSegments - 1} +} + +func (n *Locknode) prevSibling() *Locknode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Locknode) nextSibling() *Locknode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Locknode) rebalanceBeforeInsert(gap LockGapIterator) LockGapIterator { + if n.nrSegments < LockmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:LockminDegree-1], n.keys[:LockminDegree-1]) + copy(left.values[:LockminDegree-1], n.values[:LockminDegree-1]) + copy(right.keys[:LockminDegree-1], n.keys[LockminDegree:]) + copy(right.values[:LockminDegree-1], n.values[LockminDegree:]) + n.keys[0], n.values[0] = n.keys[LockminDegree-1], n.values[LockminDegree-1] + LockzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:LockminDegree], n.children[:LockminDegree]) + copy(right.children[:LockminDegree], n.children[LockminDegree:]) + LockzeroNodeSlice(n.children[2:]) + for i := 0; i < LockminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if LocktrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < LockminDegree { + return LockGapIterator{left, gap.index} + } + return LockGapIterator{right, gap.index - LockminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[LockminDegree-1], n.values[LockminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:LockminDegree-1], n.keys[LockminDegree:]) + copy(sibling.values[:LockminDegree-1], n.values[LockminDegree:]) + LockzeroValueSlice(n.values[LockminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:LockminDegree], n.children[LockminDegree:]) + LockzeroNodeSlice(n.children[LockminDegree:]) + for i := 0; i < LockminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = LockminDegree - 1 + + if LocktrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < LockminDegree { + return gap + } + return LockGapIterator{sibling, gap.index - LockminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Locknode) rebalanceAfterRemove(gap LockGapIterator) LockGapIterator { + for { + if n.nrSegments >= LockminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= LockminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + lockSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if LocktrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return LockGapIterator{n, 0} + } + if gap.node == n { + return LockGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= LockminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + lockSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if LocktrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return LockGapIterator{n, n.nrSegments} + } + return LockGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return LockGapIterator{p, gap.index} + } + if gap.node == right { + return LockGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Locknode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = LockGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + lockSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if LocktrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *Locknode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *Locknode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *Locknode) calculateMaxGapLeaf() uint64 { + max := LockGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (LockGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *Locknode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Locknode) searchFirstLargeEnoughGap(minSize uint64) LockGapIterator { + if n.maxGap.Get() < minSize { + return LockGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := LockGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Locknode) searchLastLargeEnoughGap(minSize uint64) LockGapIterator { + if n.maxGap.Get() < minSize { + return LockGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := LockGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type LockIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Locknode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg LockIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg LockIterator) Range() LockRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg LockIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg LockIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg LockIterator) SetRangeUnchecked(r LockRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetRange(r LockRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg LockIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg LockIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg LockIterator) Value() Lock { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg LockIterator) ValuePtr() *Lock { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg LockIterator) SetValue(val Lock) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg LockIterator) PrevSegment() LockIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return LockIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return LockIterator{} + } + return LocksegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg LockIterator) NextSegment() LockIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return LockIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return LockIterator{} + } + return LocksegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg LockIterator) PrevGap() LockGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return LockGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg LockIterator) NextGap() LockGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return LockGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg LockIterator) PrevNonEmpty() (LockIterator, LockGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return LockIterator{}, gap + } + return gap.PrevSegment(), LockGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg LockIterator) NextNonEmpty() (LockIterator, LockGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return LockIterator{}, gap + } + return gap.NextSegment(), LockGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type LockGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Locknode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap LockGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap LockGapIterator) Range() LockRange { + return LockRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap LockGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return lockSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap LockGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return lockSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap LockGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap LockGapIterator) PrevSegment() LockIterator { + return LocksegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap LockGapIterator) NextSegment() LockIterator { + return LocksegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap LockGapIterator) PrevGap() LockGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return LockGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap LockGapIterator) NextGap() LockGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return LockGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap LockGapIterator) NextLargeEnoughGap(minSize uint64) LockGapIterator { + if LocktrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap LockGapIterator) nextLargeEnoughGapHelper(minSize uint64) LockGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return LockGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap LockGapIterator) PrevLargeEnoughGap(minSize uint64) LockGapIterator { + if LocktrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap LockGapIterator) prevLargeEnoughGapHelper(minSize uint64) LockGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return LockGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func LocksegmentBeforePosition(n *Locknode, i int) LockIterator { + for i == 0 { + if n.parent == nil { + return LockIterator{} + } + n, i = n.parent, n.parentIndex + } + return LockIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func LocksegmentAfterPosition(n *Locknode, i int) LockIterator { + for i == n.nrSegments { + if n.parent == nil { + return LockIterator{} + } + n, i = n.parent, n.parentIndex + } + return LockIterator{n, i} +} + +func LockzeroValueSlice(slice []Lock) { + + for i := range slice { + lockSetFunctions{}.ClearValue(&slice[i]) + } +} + +func LockzeroNodeSlice(slice []*Locknode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *LockSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Locknode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Locknode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if LocktrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type LockSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []Lock +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *LockSet) ExportSortedSlices() *LockSegmentDataSlices { + var sds LockSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *LockSet) ImportSortedSlices(sds *LockSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := LockRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *LockSet) segmentTestCheck(expectedSegments int, segFunc func(int, LockRange, Lock) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *LockSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *LockSet) saveRoot() *LockSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *LockSet) loadRoot(sds *LockSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/lock/lock_state_autogen.go b/pkg/sentry/fs/lock/lock_state_autogen.go new file mode 100644 index 000000000..a0ad18763 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_state_autogen.go @@ -0,0 +1,232 @@ +// automatically generated by stateify. + +package lock + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (o *OwnerInfo) StateTypeName() string { + return "pkg/sentry/fs/lock.OwnerInfo" +} + +func (o *OwnerInfo) StateFields() []string { + return []string{ + "PID", + } +} + +func (o *OwnerInfo) beforeSave() {} + +// +checklocksignore +func (o *OwnerInfo) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.PID) +} + +func (o *OwnerInfo) afterLoad() {} + +// +checklocksignore +func (o *OwnerInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.PID) +} + +func (l *Lock) StateTypeName() string { + return "pkg/sentry/fs/lock.Lock" +} + +func (l *Lock) StateFields() []string { + return []string{ + "Readers", + "Writer", + "WriterInfo", + } +} + +func (l *Lock) beforeSave() {} + +// +checklocksignore +func (l *Lock) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Readers) + stateSinkObject.Save(1, &l.Writer) + stateSinkObject.Save(2, &l.WriterInfo) +} + +func (l *Lock) afterLoad() {} + +// +checklocksignore +func (l *Lock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Readers) + stateSourceObject.Load(1, &l.Writer) + stateSourceObject.Load(2, &l.WriterInfo) +} + +func (l *Locks) StateTypeName() string { + return "pkg/sentry/fs/lock.Locks" +} + +func (l *Locks) StateFields() []string { + return []string{ + "locks", + } +} + +func (l *Locks) beforeSave() {} + +// +checklocksignore +func (l *Locks) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + if !state.IsZeroValue(&l.blockedQueue) { + state.Failf("blockedQueue is %#v, expected zero", &l.blockedQueue) + } + stateSinkObject.Save(0, &l.locks) +} + +func (l *Locks) afterLoad() {} + +// +checklocksignore +func (l *Locks) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.locks) +} + +func (r *LockRange) StateTypeName() string { + return "pkg/sentry/fs/lock.LockRange" +} + +func (r *LockRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (r *LockRange) beforeSave() {} + +// +checklocksignore +func (r *LockRange) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Start) + stateSinkObject.Save(1, &r.End) +} + +func (r *LockRange) afterLoad() {} + +// +checklocksignore +func (r *LockRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Start) + stateSourceObject.Load(1, &r.End) +} + +func (s *LockSet) StateTypeName() string { + return "pkg/sentry/fs/lock.LockSet" +} + +func (s *LockSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *LockSet) beforeSave() {} + +// +checklocksignore +func (s *LockSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *LockSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *LockSet) afterLoad() {} + +// +checklocksignore +func (s *LockSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*LockSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*LockSegmentDataSlices)) }) +} + +func (n *Locknode) StateTypeName() string { + return "pkg/sentry/fs/lock.Locknode" +} + +func (n *Locknode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *Locknode) beforeSave() {} + +// +checklocksignore +func (n *Locknode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *Locknode) afterLoad() {} + +// +checklocksignore +func (n *Locknode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (l *LockSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/lock.LockSegmentDataSlices" +} + +func (l *LockSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (l *LockSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (l *LockSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Start) + stateSinkObject.Save(1, &l.End) + stateSinkObject.Save(2, &l.Values) +} + +func (l *LockSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (l *LockSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Start) + stateSourceObject.Load(1, &l.End) + stateSourceObject.Load(2, &l.Values) +} + +func init() { + state.Register((*OwnerInfo)(nil)) + state.Register((*Lock)(nil)) + state.Register((*Locks)(nil)) + state.Register((*LockRange)(nil)) + state.Register((*LockSet)(nil)) + state.Register((*Locknode)(nil)) + state.Register((*LockSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go deleted file mode 100644 index 9878c04e1..000000000 --- a/pkg/sentry/fs/lock/lock_test.go +++ /dev/null @@ -1,1060 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package lock - -import ( - "reflect" - "testing" -) - -type entry struct { - Lock - LockRange -} - -func equals(e0, e1 []entry) bool { - if len(e0) != len(e1) { - return false - } - for i := range e0 { - for k := range e0[i].Lock.Readers { - if _, ok := e1[i].Lock.Readers[k]; !ok { - return false - } - } - for k := range e1[i].Lock.Readers { - if _, ok := e0[i].Lock.Readers[k]; !ok { - return false - } - } - if !reflect.DeepEqual(e0[i].LockRange, e1[i].LockRange) { - return false - } - if e0[i].Lock.Writer != e1[i].Lock.Writer { - return false - } - } - return true -} - -// fill a LockSet with consecutive region locks. Will panic if -// LockRanges are not consecutive. -func fill(entries []entry) LockSet { - l := LockSet{} - for _, e := range entries { - gap := l.FindGap(e.LockRange.Start) - if !gap.Ok() { - panic("cannot insert into existing segment") - } - l.Insert(gap, e.LockRange, e.Lock) - } - return l -} - -func TestCanLockEmpty(t *testing.T) { - l := LockSet{} - - // Expect to be able to take any locks given that the set is empty. - eof := l.FirstGap().End() - r := LockRange{0, eof} - if !l.canLock(1, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 1) - } - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - if !l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 2) - } -} - -func TestCanLock(t *testing.T) { - // + -------------- + ---------- + -------------- + --------- + - // | Readers 1 & 2 | Readers 1 | Readers 1 & 3 | Writer 1 | - // + ------------- + ---------- + -------------- + --------- + - // 0 1024 2048 3072 4096 - l := fill([]entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 3: OwnerInfo{}}}, - LockRange: LockRange{2048, 3072}, - }, - { - Lock: Lock{Writer: 1}, - LockRange: LockRange{3072, 4096}, - }, - }) - - // Now that we have a mildly interesting layout, try some checks on different - // ranges, uids, and lock types. - // - // Expect to be able to extend the read lock, despite the writer lock, because - // the writer has the same uid as the requested read lock. - r := LockRange{0, 8192} - if !l.canLock(1, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 1) - } - // Expect to *not* be able to extend the read lock since there is an overlapping - // writer region locked by someone other than the uid. - if l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", ReadLock, r, 2) - } - // Expect to be able to extend the read lock if there are only other readers in - // the way. - r = LockRange{64, 3072} - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - // Expect to be able to set a read lock beyond the range of any existing locks. - r = LockRange{4096, 10240} - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - - // Expect to not be able to take a write lock with other readers in the way. - r = LockRange{0, 8192} - if l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", WriteLock, r, 1) - } - // Expect to be able to extend the write lock for the same uid. - r = LockRange{3072, 8192} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - // Expect to not be able to overlap a write lock for two different uids. - if l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", WriteLock, r, 2) - } - // Expect to be able to set a write lock that is beyond the range of any - // existing locks. - r = LockRange{8192, 10240} - if !l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 2) - } - // Expect to be able to upgrade a read lock (any portion of it). - r = LockRange{1024, 2048} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - r = LockRange{1080, 2000} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } -} - -func TestSetLock(t *testing.T) { - tests := []struct { - // description of test. - name string - - // LockSet entries to pre-fill. - before []entry - - // Description of region to lock: - // - // start is the file offset of the lock. - start uint64 - // end is the end file offset of the lock. - end uint64 - // uid of lock attempter. - uid UniqueID - // lock type requested. - lockType LockType - - // success is true if taking the above - // lock should succeed. - success bool - - // Expected layout of the set after locking - // if success is true. - after []entry - }{ - { - name: "set zero length ReadLock on empty set", - start: 0, - end: 0, - uid: 0, - lockType: ReadLock, - success: true, - }, - { - name: "set zero length WriteLock on empty set", - start: 0, - end: 0, - uid: 0, - lockType: WriteLock, - success: true, - }, - { - name: "set ReadLock on empty set", - start: 0, - end: LockEOF, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "set WriteLock on empty set", - start: 0, - end: LockEOF, - uid: 0, - lockType: WriteLock, - success: true, - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "set ReadLock on WriteLock same uid", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------- + --------------------------- + - // | Readers 0 | Writer 0 | - // + ----------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "set WriteLock on ReadLock same uid", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - lockType: WriteLock, - success: true, - // + ----------- + --------------------------- + - // | Writer 0 | Readers 0 | - // + ----------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "set ReadLock on WriteLock different uid", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: false, - }, - { - name: "set WriteLock on ReadLock different uid", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: WriteLock, - success: false, - }, - { - name: "split ReadLock for overlapping lock at start 0", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: true, - // + -------------- + --------------------------- + - // | Readers 0 & 1 | Readers 0 | - // + -------------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "split ReadLock for overlapping lock at non-zero start", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: 8192, - uid: 1, - lockType: ReadLock, - success: true, - // + ---------- + -------------- + ----------- + - // | Readers 0 | Readers 0 & 1 | Readers 0 | - // + ---------- + -------------- + ----------- + - // 0 4096 8192 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{4096, 8192}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{8192, LockEOF}, - }, - }, - }, - { - name: "fill front gap with ReadLock", - // + --------- + ---------------------------- + - // | gap | Readers 0 | - // + --------- + ---------------------------- + - // 0 1024 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - start: 0, - end: 8192, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "fill end gap with ReadLock", - // + ---------------------------- + - // | Readers 0 | - // + ---------------------------- + - // 0 4096 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, 4096}, - }, - }, - start: 1024, - end: LockEOF, - uid: 0, - lockType: ReadLock, - success: true, - // Note that this is not merged after lock does a Split. This is - // fine because the two locks will still *behave* as one. In other - // words we can fragment any lock all we want and semantically it - // makes no difference. - // - // + ----------- + --------------------------- + - // | Readers 0 | Readers 0 | - // + ----------- + --------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - }, - { - name: "fill gap with ReadLock and split", - // + --------- + ---------------------------- + - // | gap | Readers 0 | - // + --------- + ---------------------------- + - // 0 1024 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: true, - // + --------- + ------------- + ------------- + - // | Reader 1 | Readers 0 & 1 | Reader 0 | - // + ----------+ ------------- + ------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "upgrade ReadLock to WriteLock for single uid fill gap", - // + ------------- + --------- + --- + ------------- + - // | Readers 0 & 1 | Readers 0 | gap | Readers 0 & 2 | - // + ------------- + --------- + --- + ------------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - lockType: WriteLock, - success: true, - // + ------------- + -------- + ------------- + - // | Readers 0 & 1 | Writer 0 | Readers 0 & 2 | - // + ------------- + -------- + ------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "upgrade ReadLock to WriteLock for single uid keep gap", - // + ------------- + --------- + --- + ------------- + - // | Readers 0 & 1 | Readers 0 | gap | Readers 0 & 2 | - // + ------------- + --------- + --- + ------------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 1024, - end: 3072, - uid: 0, - lockType: WriteLock, - success: true, - // + ------------- + -------- + --- + ------------- + - // | Readers 0 & 1 | Writer 0 | gap | Readers 0 & 2 | - // + ------------- + -------- + --- + ------------- + - // 0 1024 3072 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, 3072}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "fail to upgrade ReadLock to WriteLock with conflicting Reader", - // + ------------- + --------- + - // | Readers 0 & 1 | Readers 0 | - // + ------------- + --------- + - // 0 1024 2048 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, 2048}, - }, - }, - start: 0, - end: 2048, - uid: 0, - lockType: WriteLock, - success: false, - }, - { - name: "take WriteLock on whole file if all uids are the same", - // + ------------- + --------- + --------- + ---------- + - // | Writer 0 | Readers 0 | Readers 0 | Readers 0 | - // + ------------- + --------- + --------- + ---------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{2048, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - lockType: WriteLock, - success: true, - // We do not manually merge locks. Semantically a fragmented lock - // held by the same uid will behave as one lock so it makes no difference. - // - // + ------------- + ---------------------------- + - // | Writer 0 | Writer 0 | - // + ------------- + ---------------------------- + - // 0 1024 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - l := fill(test.before) - - r := LockRange{Start: test.start, End: test.end} - success := l.lock(test.uid, 0 /* ownerPID */, test.lockType, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - - if success != test.success { - t.Errorf("setlock(%v, %+v, %d, %d) got success %v, want %v", test.before, r, test.uid, test.lockType, success, test.success) - return - } - - if success { - if !equals(got, test.after) { - t.Errorf("got set %+v, want %+v", got, test.after) - } - } - }) - } -} - -func TestUnlock(t *testing.T) { - tests := []struct { - // description of test. - name string - - // LockSet entries to pre-fill. - before []entry - - // Description of region to unlock: - // - // start is the file start of the lock. - start uint64 - // end is the end file start of the lock. - end uint64 - // uid of lock holder. - uid UniqueID - - // Expected layout of the set after unlocking. - after []entry - }{ - { - name: "unlock zero length on empty set", - start: 0, - end: 0, - uid: 0, - }, - { - name: "unlock on empty set (no-op)", - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock uid not locked (no-op)", - // + --------------------------- + - // | Readers 1 & 2 | - // + --------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - // + --------------------------- + - // | Readers 1 & 2 | - // + --------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "unlock ReadLock over entire file", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock WriteLock over entire file", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock partial ReadLock (start)", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - // + ------ + --------------------------- + - // | gap | Readers 0 | - // +------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock partial WriteLock (start)", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - // + ------ + --------------------------- + - // | gap | Writer 0 | - // +------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock partial ReadLock (end)", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: LockEOF, - uid: 0, - // + --------------------------- + - // | Readers 0 | - // +---------------------------- + - // 0 4096 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}}}, - LockRange: LockRange{0, 4096}, - }, - }, - }, - { - name: "unlock partial WriteLock (end)", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: LockEOF, - uid: 0, - // + --------------------------- + - // | Writer 0 | - // +---------------------------- + - // 0 4096 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 4096}, - }, - }, - }, - { - name: "unlock for single uid", - // + ------------- + --------- + ------------------- + - // | Readers 0 & 1 | Writer 0 | Readers 0 & 1 & 2 | - // + ------------- + --------- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - // + --------- + --- + --------------- + - // | Readers 1 | gap | Readers 1 & 2 | - // + --------- + --- + --------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock subsection locked", - // + ------------------------------- + - // | Readers 0 & 1 & 2 | - // + ------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - // + ----------------- + ------------- + ----------------- + - // | Readers 0 & 1 & 2 | Readers 1 & 2 | Readers 0 & 1 & 2 | - // + ----------------- + ------------- + ----------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}, 2: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock mid-gap to increase gap", - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 8, - end: 2048, - uid: 0, - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 8 4096 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 8}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock split region on uid mid-gap", - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 2048, - end: 8192, - uid: 0, - // + --------- + ----- + --------- + ------------- + - // | Writer 0 | gap | Readers 1 | Readers 0 & 1 | - // + --------- + ----- + --------- + ------------- + - // 0 1024 4096 8192 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{1: OwnerInfo{}}}, - LockRange: LockRange{4096, 8192}, - }, - { - Lock: Lock{Readers: map[UniqueID]OwnerInfo{0: OwnerInfo{}, 1: OwnerInfo{}}}, - LockRange: LockRange{8192, LockEOF}, - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - l := fill(test.before) - - r := LockRange{Start: test.start, End: test.end} - l.unlock(test.uid, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - if !equals(got, test.after) { - t.Errorf("got set %+v, want %+v", got, test.after) - } - }) - } -} diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go deleted file mode 100644 index 6c296f5d0..000000000 --- a/pkg/sentry/fs/mount_test.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" -) - -// cacheReallyContains iterates through the dirent cache to determine whether -// it contains the given dirent. -func cacheReallyContains(cache *DirentCache, d *Dirent) bool { - for i := cache.list.Front(); i != nil; i = i.Next() { - if i == d { - return true - } - } - return false -} - -func mountPathsAre(ctx context.Context, root *Dirent, got []*Mount, want ...string) error { - gotPaths := make(map[string]struct{}, len(got)) - gotStr := make([]string, len(got)) - for i, g := range got { - if groot := g.Root(); groot != nil { - name, _ := groot.FullName(root) - groot.DecRef(ctx) - gotStr[i] = name - gotPaths[name] = struct{}{} - } - } - if len(got) != len(want) { - return fmt.Errorf("mount paths are different, got: %q, want: %q", gotStr, want) - } - for _, w := range want { - if _, ok := gotPaths[w]; !ok { - return fmt.Errorf("no mount with path %q found", w) - } - } - return nil -} - -// TestMountSourceOnlyCachedOnce tests that a Dirent that is mounted over only ends -// up in a single Dirent Cache. NOTE(b/63848693): Having a dirent in multiple -// caches causes major consistency issues. -func TestMountSourceOnlyCachedOnce(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef(ctx) - - // Get a child of the root which we will mount over. Note that the - // MockInodeOperations causes Walk to always succeed. - child, err := rootDirent.Walk(ctx, rootDirent, "child") - if err != nil { - t.Fatalf("failed to walk to child dirent: %v", err) - } - child.maybeExtendReference() // Cache. - - // Ensure that the root cache contains the child. - if !cacheReallyContains(rootCache, child) { - t.Errorf("wanted rootCache to contain child dirent, but it did not") - } - - // Create a new cache and inode, and mount it over child. - submountCache := NewDirentCache(100) - submountInode := NewMockInode(ctx, NewMockMountSource(submountCache), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, child, submountInode); err != nil { - t.Fatalf("failed to mount over child: %v", err) - } - - // Walk to the child again. - child2, err := rootDirent.Walk(ctx, rootDirent, "child") - if err != nil { - t.Fatalf("failed to walk to child dirent: %v", err) - } - - // Should have a different Dirent than before. - if child == child2 { - t.Fatalf("expected %v not equal to %v, but they are the same", child, child2) - } - - // Neither of the caches should no contain the child. - if cacheReallyContains(rootCache, child) { - t.Errorf("wanted rootCache not to contain child dirent, but it did") - } - if cacheReallyContains(submountCache, child) { - t.Errorf("wanted submountCache not to contain child dirent, but it did") - } -} - -func TestAllMountsUnder(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef(ctx) - - // Add mounts at the following paths: - paths := []string{ - "/foo", - "/foo/bar", - "/foo/bar/baz", - "/foo/qux", - "/waldo", - } - - var maxTraversals uint - for _, p := range paths { - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, d, submountInode); err != nil { - t.Fatalf("could not mount at %q: %v", p, err) - } - d.DecRef(ctx) - } - - // mm root should contain all submounts (and does not include the root mount). - rootMnt := mm.FindMount(rootDirent) - submounts := mm.AllMountsUnder(rootMnt) - allPaths := append(paths, "/") - if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil { - t.Error(err) - } - - // Each mount should have a unique ID. - foundIDs := make(map[uint64]struct{}) - for _, m := range submounts { - if _, ok := foundIDs[m.ID]; ok { - t.Errorf("got multiple mounts with id %d", m.ID) - } - foundIDs[m.ID] = struct{}{} - } - - // Root mount should have no parent. - if p := rootMnt.ParentID; p != invalidMountID { - t.Errorf("root.Parent got %v wanted nil", p) - } - - // Check that "foo" mount has 3 children. - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, "/foo", &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", "/foo", err) - } - defer d.DecRef(ctx) - submounts = mm.AllMountsUnder(mm.FindMount(d)) - if err := mountPathsAre(ctx, rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil { - t.Error(err) - } - - // "waldo" mount should have no children. - maxTraversals = 0 - waldo, err := mm.FindLink(ctx, rootDirent, nil, "/waldo", &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", "/waldo", err) - } - defer waldo.DecRef(ctx) - submounts = mm.AllMountsUnder(mm.FindMount(waldo)) - if err := mountPathsAre(ctx, rootDirent, submounts, "/waldo"); err != nil { - t.Error(err) - } -} - -func TestUnmount(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef(ctx) - - // Add mounts at the following paths: - paths := []string{ - "/foo", - "/foo/bar", - "/foo/bar/goo", - "/foo/bar/goo/abc", - "/foo/abc", - "/foo/def", - "/waldo", - "/wally", - } - - var maxTraversals uint - for _, p := range paths { - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, d, submountInode); err != nil { - t.Fatalf("could not mount at %q: %v", p, err) - } - d.DecRef(ctx) - } - - allPaths := make([]string, len(paths)+1) - allPaths[0] = "/" - copy(allPaths[1:], paths) - - rootMnt := mm.FindMount(rootDirent) - for i := len(paths) - 1; i >= 0; i-- { - maxTraversals = 0 - p := paths[i] - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - if err := mm.Unmount(ctx, d, false); err != nil { - t.Fatalf("could not unmount at %q: %v", p, err) - } - d.DecRef(ctx) - - // Remove the path that has been unmounted and the check that the remaining - // mounts are still there. - allPaths = allPaths[:len(allPaths)-1] - submounts := mm.AllMountsUnder(rootMnt) - if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil { - t.Error(err) - } - } -} diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go deleted file mode 100644 index 975d6cbc9..000000000 --- a/pkg/sentry/fs/mounts_test.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -// Creates a new MountNamespace with filesystem: -// / (root dir) -// |-foo (dir) -// |-bar (file) -func createMountNamespace(ctx context.Context) (*fs.MountNamespace, error) { - perms := fs.FilePermsFromMode(0777) - m := fs.NewPseudoMountSource(ctx) - - barFile := fsutil.NewSimpleFileInode(ctx, fs.RootOwner, perms, 0) - fooDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "bar": fs.NewInode(ctx, barFile, m, fs.StableAttr{Type: fs.RegularFile}), - }, fs.RootOwner, perms) - rootDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "foo": fs.NewInode(ctx, fooDir, m, fs.StableAttr{Type: fs.Directory}), - }, fs.RootOwner, perms) - - return fs.NewMountNamespace(ctx, fs.NewInode(ctx, rootDir, m, fs.StableAttr{Type: fs.Directory})) -} - -func TestFindLink(t *testing.T) { - ctx := contexttest.Context(t) - mm, err := createMountNamespace(ctx) - if err != nil { - t.Fatalf("createMountNamespace failed: %v", err) - } - - root := mm.Root() - defer root.DecRef(ctx) - foo, err := root.Walk(ctx, root, "foo") - if err != nil { - t.Fatalf("Error walking to foo: %v", err) - } - - // Positive cases. - for _, tc := range []struct { - findPath string - wd *fs.Dirent - wantPath string - }{ - {".", root, "/"}, - {".", foo, "/foo"}, - {"..", foo, "/"}, - {"../../..", foo, "/"}, - {"///foo", foo, "/foo"}, - {"/foo", foo, "/foo"}, - {"/foo/bar", foo, "/foo/bar"}, - {"/foo/.///./bar", foo, "/foo/bar"}, - {"/foo///bar", foo, "/foo/bar"}, - {"/foo/../foo/bar", foo, "/foo/bar"}, - {"foo/bar", root, "/foo/bar"}, - {"foo////bar", root, "/foo/bar"}, - {"bar", foo, "/foo/bar"}, - } { - wdPath, _ := tc.wd.FullName(root) - maxTraversals := uint(0) - if d, err := mm.FindLink(ctx, root, tc.wd, tc.findPath, &maxTraversals); err != nil { - t.Errorf("FindLink(%q, wd=%q) failed: %v", tc.findPath, wdPath, err) - } else if got, _ := d.FullName(root); got != tc.wantPath { - t.Errorf("FindLink(%q, wd=%q) got dirent %q, want %q", tc.findPath, wdPath, got, tc.wantPath) - } - } - - // Negative cases. - for _, tc := range []struct { - findPath string - wd *fs.Dirent - }{ - {"bar", root}, - {"/bar", root}, - {"/foo/../../bar", root}, - {"foo", foo}, - } { - wdPath, _ := tc.wd.FullName(root) - maxTraversals := uint(0) - if _, err := mm.FindLink(ctx, root, tc.wd, tc.findPath, &maxTraversals); err == nil { - t.Errorf("FindLink(%q, wd=%q) did not return error", tc.findPath, wdPath) - } - } -} diff --git a/pkg/sentry/fs/path_test.go b/pkg/sentry/fs/path_test.go deleted file mode 100644 index e6f57ebba..000000000 --- a/pkg/sentry/fs/path_test.go +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" -) - -// TestSplitLast tests variants of path splitting. -func TestSplitLast(t *testing.T) { - cases := []struct { - path string - dir string - file string - }{ - {path: "/", dir: "/", file: "."}, - {path: "/.", dir: "/", file: "."}, - {path: "/./", dir: "/", file: "."}, - {path: "/./.", dir: "/.", file: "."}, - {path: "/././", dir: "/.", file: "."}, - {path: "/./..", dir: "/.", file: ".."}, - {path: "/./../", dir: "/.", file: ".."}, - {path: "/..", dir: "/", file: ".."}, - {path: "/../", dir: "/", file: ".."}, - {path: "/../.", dir: "/..", file: "."}, - {path: "/.././", dir: "/..", file: "."}, - {path: "/../..", dir: "/..", file: ".."}, - {path: "/../../", dir: "/..", file: ".."}, - - {path: "", dir: ".", file: "."}, - {path: ".", dir: ".", file: "."}, - {path: "./", dir: ".", file: "."}, - {path: "./.", dir: ".", file: "."}, - {path: "././", dir: ".", file: "."}, - {path: "./..", dir: ".", file: ".."}, - {path: "./../", dir: ".", file: ".."}, - {path: "..", dir: ".", file: ".."}, - {path: "../", dir: ".", file: ".."}, - {path: "../.", dir: "..", file: "."}, - {path: ".././", dir: "..", file: "."}, - {path: "../..", dir: "..", file: ".."}, - {path: "../../", dir: "..", file: ".."}, - - {path: "/foo", dir: "/", file: "foo"}, - {path: "/foo/", dir: "/", file: "foo"}, - {path: "/foo/.", dir: "/foo", file: "."}, - {path: "/foo/./", dir: "/foo", file: "."}, - {path: "/foo/./.", dir: "/foo/.", file: "."}, - {path: "/foo/./..", dir: "/foo/.", file: ".."}, - {path: "/foo/..", dir: "/foo", file: ".."}, - {path: "/foo/../", dir: "/foo", file: ".."}, - {path: "/foo/../.", dir: "/foo/..", file: "."}, - {path: "/foo/../..", dir: "/foo/..", file: ".."}, - - {path: "/foo/bar", dir: "/foo", file: "bar"}, - {path: "/foo/bar/", dir: "/foo", file: "bar"}, - {path: "/foo/bar/.", dir: "/foo/bar", file: "."}, - {path: "/foo/bar/./", dir: "/foo/bar", file: "."}, - {path: "/foo/bar/./.", dir: "/foo/bar/.", file: "."}, - {path: "/foo/bar/./..", dir: "/foo/bar/.", file: ".."}, - {path: "/foo/bar/..", dir: "/foo/bar", file: ".."}, - {path: "/foo/bar/../", dir: "/foo/bar", file: ".."}, - {path: "/foo/bar/../.", dir: "/foo/bar/..", file: "."}, - {path: "/foo/bar/../..", dir: "/foo/bar/..", file: ".."}, - - {path: "foo", dir: ".", file: "foo"}, - {path: "foo", dir: ".", file: "foo"}, - {path: "foo/", dir: ".", file: "foo"}, - {path: "foo/.", dir: "foo", file: "."}, - {path: "foo/./", dir: "foo", file: "."}, - {path: "foo/./.", dir: "foo/.", file: "."}, - {path: "foo/./..", dir: "foo/.", file: ".."}, - {path: "foo/..", dir: "foo", file: ".."}, - {path: "foo/../", dir: "foo", file: ".."}, - {path: "foo/../.", dir: "foo/..", file: "."}, - {path: "foo/../..", dir: "foo/..", file: ".."}, - {path: "foo/", dir: ".", file: "foo"}, - {path: "foo/.", dir: "foo", file: "."}, - - {path: "foo/bar", dir: "foo", file: "bar"}, - {path: "foo/bar/", dir: "foo", file: "bar"}, - {path: "foo/bar/.", dir: "foo/bar", file: "."}, - {path: "foo/bar/./", dir: "foo/bar", file: "."}, - {path: "foo/bar/./.", dir: "foo/bar/.", file: "."}, - {path: "foo/bar/./..", dir: "foo/bar/.", file: ".."}, - {path: "foo/bar/..", dir: "foo/bar", file: ".."}, - {path: "foo/bar/../", dir: "foo/bar", file: ".."}, - {path: "foo/bar/../.", dir: "foo/bar/..", file: "."}, - {path: "foo/bar/../..", dir: "foo/bar/..", file: ".."}, - {path: "foo/bar/", dir: "foo", file: "bar"}, - {path: "foo/bar/.", dir: "foo/bar", file: "."}, - } - - for _, c := range cases { - dir, file := SplitLast(c.path) - if dir != c.dir || file != c.file { - t.Errorf("SplitLast(%q) got (%q, %q), expected (%q, %q)", c.path, dir, file, c.dir, c.file) - } - } -} - -// TestSplitFirst tests variants of path splitting. -func TestSplitFirst(t *testing.T) { - cases := []struct { - path string - first string - remainder string - }{ - {path: "/", first: "/", remainder: ""}, - {path: "/.", first: "/", remainder: "."}, - {path: "///.", first: "/", remainder: "//."}, - {path: "/.///", first: "/", remainder: "."}, - {path: "/./.", first: "/", remainder: "./."}, - {path: "/././", first: "/", remainder: "./."}, - {path: "/./..", first: "/", remainder: "./.."}, - {path: "/./../", first: "/", remainder: "./.."}, - {path: "/..", first: "/", remainder: ".."}, - {path: "/../", first: "/", remainder: ".."}, - {path: "/../.", first: "/", remainder: "../."}, - {path: "/.././", first: "/", remainder: "../."}, - {path: "/../..", first: "/", remainder: "../.."}, - {path: "/../../", first: "/", remainder: "../.."}, - - {path: "", first: ".", remainder: ""}, - {path: ".", first: ".", remainder: ""}, - {path: "./", first: ".", remainder: ""}, - {path: ".///", first: ".", remainder: ""}, - {path: "./.", first: ".", remainder: "."}, - {path: "././", first: ".", remainder: "."}, - {path: "./..", first: ".", remainder: ".."}, - {path: "./../", first: ".", remainder: ".."}, - {path: "..", first: "..", remainder: ""}, - {path: "../", first: "..", remainder: ""}, - {path: "../.", first: "..", remainder: "."}, - {path: ".././", first: "..", remainder: "."}, - {path: "../..", first: "..", remainder: ".."}, - {path: "../../", first: "..", remainder: ".."}, - - {path: "/foo", first: "/", remainder: "foo"}, - {path: "/foo/", first: "/", remainder: "foo"}, - {path: "/foo///", first: "/", remainder: "foo"}, - {path: "/foo/.", first: "/", remainder: "foo/."}, - {path: "/foo/./", first: "/", remainder: "foo/."}, - {path: "/foo/./.", first: "/", remainder: "foo/./."}, - {path: "/foo/./..", first: "/", remainder: "foo/./.."}, - {path: "/foo/..", first: "/", remainder: "foo/.."}, - {path: "/foo/../", first: "/", remainder: "foo/.."}, - {path: "/foo/../.", first: "/", remainder: "foo/../."}, - {path: "/foo/../..", first: "/", remainder: "foo/../.."}, - - {path: "/foo/bar", first: "/", remainder: "foo/bar"}, - {path: "///foo/bar", first: "/", remainder: "//foo/bar"}, - {path: "/foo///bar", first: "/", remainder: "foo///bar"}, - {path: "/foo/bar/.", first: "/", remainder: "foo/bar/."}, - {path: "/foo/bar/./", first: "/", remainder: "foo/bar/."}, - {path: "/foo/bar/./.", first: "/", remainder: "foo/bar/./."}, - {path: "/foo/bar/./..", first: "/", remainder: "foo/bar/./.."}, - {path: "/foo/bar/..", first: "/", remainder: "foo/bar/.."}, - {path: "/foo/bar/../", first: "/", remainder: "foo/bar/.."}, - {path: "/foo/bar/../.", first: "/", remainder: "foo/bar/../."}, - {path: "/foo/bar/../..", first: "/", remainder: "foo/bar/../.."}, - - {path: "foo", first: "foo", remainder: ""}, - {path: "foo", first: "foo", remainder: ""}, - {path: "foo/", first: "foo", remainder: ""}, - {path: "foo///", first: "foo", remainder: ""}, - {path: "foo/.", first: "foo", remainder: "."}, - {path: "foo/./", first: "foo", remainder: "."}, - {path: "foo/./.", first: "foo", remainder: "./."}, - {path: "foo/./..", first: "foo", remainder: "./.."}, - {path: "foo/..", first: "foo", remainder: ".."}, - {path: "foo/../", first: "foo", remainder: ".."}, - {path: "foo/../.", first: "foo", remainder: "../."}, - {path: "foo/../..", first: "foo", remainder: "../.."}, - {path: "foo/", first: "foo", remainder: ""}, - {path: "foo/.", first: "foo", remainder: "."}, - - {path: "foo/bar", first: "foo", remainder: "bar"}, - {path: "foo///bar", first: "foo", remainder: "bar"}, - {path: "foo/bar/", first: "foo", remainder: "bar"}, - {path: "foo/bar/.", first: "foo", remainder: "bar/."}, - {path: "foo/bar/./", first: "foo", remainder: "bar/."}, - {path: "foo/bar/./.", first: "foo", remainder: "bar/./."}, - {path: "foo/bar/./..", first: "foo", remainder: "bar/./.."}, - {path: "foo/bar/..", first: "foo", remainder: "bar/.."}, - {path: "foo/bar/../", first: "foo", remainder: "bar/.."}, - {path: "foo/bar/../.", first: "foo", remainder: "bar/../."}, - {path: "foo/bar/../..", first: "foo", remainder: "bar/../.."}, - {path: "foo/bar/", first: "foo", remainder: "bar"}, - {path: "foo/bar/.", first: "foo", remainder: "bar/."}, - } - - for _, c := range cases { - first, remainder := SplitFirst(c.path) - if first != c.first || remainder != c.remainder { - t.Errorf("SplitFirst(%q) got (%q, %q), expected (%q, %q)", c.path, first, remainder, c.first, c.remainder) - } - } -} - -// TestIsSubpath tests the IsSubpath method. -func TestIsSubpath(t *testing.T) { - tcs := []struct { - // Two absolute paths. - pathA string - pathB string - - // Whether pathA is a subpath of pathB. - wantIsSubpath bool - - // Relative path from pathA to pathB. Only checked if - // wantIsSubpath is true. - wantRelpath string - }{ - { - pathA: "/foo/bar/baz", - pathB: "/foo", - wantIsSubpath: true, - wantRelpath: "bar/baz", - }, - { - pathA: "/foo", - pathB: "/foo/bar/baz", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foobar", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foobar", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foobar", - wantIsSubpath: false, - }, - { - pathA: "/", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/", - wantIsSubpath: true, - wantRelpath: "foo", - }, - { - pathA: "/foo/bar/../bar", - pathB: "/foo", - wantIsSubpath: true, - wantRelpath: "bar", - }, - { - pathA: "/foo/bar", - pathB: "/foo/../foo", - wantIsSubpath: true, - wantRelpath: "bar", - }, - } - - for _, tc := range tcs { - gotRelpath, gotIsSubpath := IsSubpath(tc.pathA, tc.pathB) - if gotRelpath != tc.wantRelpath || gotIsSubpath != tc.wantIsSubpath { - t.Errorf("IsSubpath(%q, %q) got %q %t, want %q %t", tc.pathA, tc.pathB, gotRelpath, gotIsSubpath, tc.wantRelpath, tc.wantIsSubpath) - } - } -} diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD deleted file mode 100644 index e6d74b949..000000000 --- a/pkg/sentry/fs/proc/BUILD +++ /dev/null @@ -1,75 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "proc", - srcs = [ - "cgroup.go", - "cpuinfo.go", - "exec_args.go", - "fds.go", - "filesystems.go", - "fs.go", - "inode.go", - "loadavg.go", - "meminfo.go", - "mounts.go", - "net.go", - "proc.go", - "stat.go", - "sys.go", - "sys_net.go", - "sys_net_state.go", - "task.go", - "uid_gid_map.go", - "uptime.go", - "version.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/proc/device", - "//pkg/sentry/fs/proc/seqfile", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fsbridge", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/syserror", - "//pkg/tcpip/header", - "//pkg/tcpip/network/ipv4", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "proc_test", - size = "small", - srcs = [ - "net_test.go", - "sys_net_test.go", - ], - library = ":proc", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/inet", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md deleted file mode 100644 index 6667a0916..000000000 --- a/pkg/sentry/fs/proc/README.md +++ /dev/null @@ -1,336 +0,0 @@ -This document tracks what is implemented in procfs. Refer to -Documentation/filesystems/proc.txt in the Linux project for information about -procfs generally. - -**NOTE**: This document is not guaranteed to be up to date. If you find an -inconsistency, please file a bug. - -[TOC] - -## Kernel data - -The following files are implemented: - -<!-- mdformat off(don't wrap the table) --> - -| File /proc/ | Content | -| :------------------------ | :---------------------------------------------------- | -| [cpuinfo](#cpuinfo) | Info about the CPU | -| [filesystems](#filesystems) | Supported filesystems | -| [loadavg](#loadavg) | Load average of last 1, 5 & 15 minutes | -| [meminfo](#meminfo) | Overall memory info | -| [stat](#stat) | Overall kernel statistics | -| [sys](#sys) | Change parameters within the kernel | -| [uptime](#uptime) | Wall clock since boot, combined idle time of all cpus | -| [version](#version) | Kernel version | - -<!-- mdformat on --> - -### cpuinfo - -```bash -$ cat /proc/cpuinfo -processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 45 -model name : unknown -stepping : unknown -cpu MHz : 1234.588 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx xsaveopt -bogomips : 1234.59 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management: - -... -``` - -Notable divergences: - -Field name | Notes -:--------------- | :--------------------------------------- -model name | Always unknown -stepping | Always unknown -fpu | Always yes -fpu_exception | Always yes -wp | Always yes -bogomips | Bogus value (matches cpu MHz) -clflush size | Always 64 -cache_alignment | Always 64 -address sizes | Always 46 bits physical, 48 bits virtual -power management | Always blank - -Otherwise fields are derived from the sentry configuration. - -### filesystems - -```bash -$ cat /proc/filesystems -nodev 9p -nodev devpts -nodev devtmpfs -nodev proc -nodev sysfs -nodev tmpfs -``` - -### loadavg - -```bash -$ cat /proc/loadavg -0.00 0.00 0.00 0/0 0 -``` - -Column | Notes -:------------------------------------ | :---------- -CPU.IO utilization in last 1 minute | Always zero -CPU.IO utilization in last 5 minutes | Always zero -CPU.IO utilization in last 10 minutes | Always zero -Num currently running processes | Always zero -Total num processes | Always zero - -TODO(b/62345059): Populate the columns with accurate statistics. - -### meminfo - -```bash -$ cat /proc/meminfo -MemTotal: 2097152 kB -MemFree: 2083540 kB -MemAvailable: 2083540 kB -Buffers: 0 kB -Cached: 4428 kB -SwapCache: 0 kB -Active: 10812 kB -Inactive: 2216 kB -Active(anon): 8600 kB -Inactive(anon): 0 kB -Active(file): 2212 kB -Inactive(file): 2216 kB -Unevictable: 0 kB -Mlocked: 0 kB -SwapTotal: 0 kB -SwapFree: 0 kB -Dirty: 0 kB -Writeback: 0 kB -AnonPages: 8600 kB -Mapped: 4428 kB -Shmem: 0 kB - -``` - -Notable divergences: - -Field name | Notes -:---------------- | :----------------------------------------------------- -Buffers | Always zero, no block devices -SwapCache | Always zero, no swap -Inactive(anon) | Always zero, see SwapCache -Unevictable | Always zero TODO(b/31823263) -Mlocked | Always zero TODO(b/31823263) -SwapTotal | Always zero, no swap -SwapFree | Always zero, no swap -Dirty | Always zero TODO(b/31823263) -Writeback | Always zero TODO(b/31823263) -MemAvailable | Uses the same value as MemFree since there is no swap. -Slab | Missing -SReclaimable | Missing -SUnreclaim | Missing -KernelStack | Missing -PageTables | Missing -NFS_Unstable | Missing -Bounce | Missing -WritebackTmp | Missing -CommitLimit | Missing -Committed_AS | Missing -VmallocTotal | Missing -VmallocUsed | Missing -VmallocChunk | Missing -HardwareCorrupted | Missing -AnonHugePages | Missing -ShmemHugePages | Missing -ShmemPmdMapped | Missing -HugePages_Total | Missing -HugePages_Free | Missing -HugePages_Rsvd | Missing -HugePages_Surp | Missing -Hugepagesize | Missing -DirectMap4k | Missing -DirectMap2M | Missing -DirectMap1G | Missing - -### stat - -```bash -$ cat /proc/stat -cpu 0 0 0 0 0 0 0 0 0 0 -cpu0 0 0 0 0 0 0 0 0 0 0 -cpu1 0 0 0 0 0 0 0 0 0 0 -cpu2 0 0 0 0 0 0 0 0 0 0 -cpu3 0 0 0 0 0 0 0 0 0 0 -cpu4 0 0 0 0 0 0 0 0 0 0 -cpu5 0 0 0 0 0 0 0 0 0 0 -cpu6 0 0 0 0 0 0 0 0 0 0 -cpu7 0 0 0 0 0 0 0 0 0 0 -intr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -ctxt 0 -btime 1504040968 -processes 0 -procs_running 0 -procs_blokkcked 0 -softirq 0 0 0 0 0 0 0 0 0 0 0 -``` - -All fields except for `btime` are always zero. - -TODO(b/37226836): Populate with accurate fields. - -### sys - -```bash -$ ls /proc/sys -kernel vm -``` - -Directory | Notes -:-------- | :---------------------------- -abi | Missing -debug | Missing -dev | Missing -fs | Missing -kernel | Contains hostname (only) -net | Missing -user | Missing -vm | Contains mmap_min_addr (only) - -### uptime - -```bash -$ cat /proc/uptime -3204.62 0.00 -``` - -Column | Notes -:------------------------------- | :---------------------------- -Total num seconds system running | Time since procfs was mounted -Number of seconds idle | Always zero - -### version - -```bash -$ cat /proc/version -Linux version 4.4 #1 SMP Sun Jan 10 15:06:54 PST 2016 -``` - -## Process-specific data - -The following files are implemented: - -File /proc/PID | Content -:---------------------- | :--------------------------------------------------- -[auxv](#auxv) | Copy of auxiliary vector for the process -[cmdline](#cmdline) | Command line arguments -[comm](#comm) | Command name associated with the process -[environ](#environ) | Process environment -[exe](#exe) | Symlink to the process's executable -[fd](#fd) | Directory containing links to open file descriptors -[fdinfo](#fdinfo) | Information associated with open file descriptors -[gid_map](#gid_map) | Mappings for group IDs inside the user namespace -[io](#io) | IO statistics -[maps](#maps) | Memory mappings (anon, executables, library files) -[mounts](#mounts) | Mounted filesystems -[mountinfo](#mountinfo) | Information about mounts -[ns](#ns) | Directory containing info about supported namespaces -[stat](#stat) | Process statistics -[statm](#statm) | Process memory statistics -[status](#status) | Process status in human readable format -[task](#task) | Directory containing info about running threads -[uid_map](#uid_map) | Mappings for user IDs inside the user namespace - -### auxv - -TODO - -### cmdline - -TODO - -### comm - -TODO - -### environment - -TODO - -### exe - -TODO - -### fd - -TODO - -### fdinfo - -TODO - -### gid_map - -TODO - -### io - -Only has data for rchar, wchar, syscr, and syscw. - -TODO: add more detail. - -### maps - -TODO - -### mounts - -TODO - -### mountinfo - -TODO - -### ns - -TODO - -### stat - -Only has data for pid, comm, state, ppid, utime, stime, cutime, cstime, -num_threads, and exit_signal. - -TODO: add more detail. - -### statm - -Only has data for vss and rss. - -TODO: add more detail. - -### status - -Contains data for Name, State, Tgid, Pid, Ppid, TracerPid, FDSize, VmSize, -VmRSS, Threads, CapInh, CapPrm, CapEff, CapBnd, Seccomp. - -TODO: add more detail. - -### task - -TODO - -### uid_map - -TODO diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD deleted file mode 100644 index 52c9aa93d..000000000 --- a/pkg/sentry/fs/proc/device/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "device", - srcs = ["device.go"], - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/sentry/device"], -) diff --git a/pkg/sentry/fs/proc/device/device_state_autogen.go b/pkg/sentry/fs/proc/device/device_state_autogen.go new file mode 100644 index 000000000..4a5e3cc88 --- /dev/null +++ b/pkg/sentry/fs/proc/device/device_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package device diff --git a/pkg/sentry/fs/proc/net_test.go b/pkg/sentry/fs/proc/net_test.go deleted file mode 100644 index f18681405..000000000 --- a/pkg/sentry/fs/proc/net_test.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/inet" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{s: newIPv6TestStack()} - if got := n.contents(); got != nil { - t.Errorf("Got n.contents() = %v, want = %v", got, nil) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{s: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} diff --git a/pkg/sentry/fs/proc/proc_state_autogen.go b/pkg/sentry/fs/proc/proc_state_autogen.go new file mode 100644 index 000000000..8046c8bf9 --- /dev/null +++ b/pkg/sentry/fs/proc/proc_state_autogen.go @@ -0,0 +1,1835 @@ +// automatically generated by stateify. + +package proc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *execArgInode) StateTypeName() string { + return "pkg/sentry/fs/proc.execArgInode" +} + +func (i *execArgInode) StateFields() []string { + return []string{ + "SimpleFileInode", + "arg", + "t", + } +} + +func (i *execArgInode) beforeSave() {} + +// +checklocksignore +func (i *execArgInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.SimpleFileInode) + stateSinkObject.Save(1, &i.arg) + stateSinkObject.Save(2, &i.t) +} + +func (i *execArgInode) afterLoad() {} + +// +checklocksignore +func (i *execArgInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.SimpleFileInode) + stateSourceObject.Load(1, &i.arg) + stateSourceObject.Load(2, &i.t) +} + +func (f *execArgFile) StateTypeName() string { + return "pkg/sentry/fs/proc.execArgFile" +} + +func (f *execArgFile) StateFields() []string { + return []string{ + "arg", + "t", + } +} + +func (f *execArgFile) beforeSave() {} + +// +checklocksignore +func (f *execArgFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.arg) + stateSinkObject.Save(1, &f.t) +} + +func (f *execArgFile) afterLoad() {} + +// +checklocksignore +func (f *execArgFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.arg) + stateSourceObject.Load(1, &f.t) +} + +func (f *fdDir) StateTypeName() string { + return "pkg/sentry/fs/proc.fdDir" +} + +func (f *fdDir) StateFields() []string { + return []string{ + "Dir", + "t", + } +} + +func (f *fdDir) beforeSave() {} + +// +checklocksignore +func (f *fdDir) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Dir) + stateSinkObject.Save(1, &f.t) +} + +func (f *fdDir) afterLoad() {} + +// +checklocksignore +func (f *fdDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Dir) + stateSourceObject.Load(1, &f.t) +} + +func (f *fdDirFile) StateTypeName() string { + return "pkg/sentry/fs/proc.fdDirFile" +} + +func (f *fdDirFile) StateFields() []string { + return []string{ + "isInfoFile", + "t", + } +} + +func (f *fdDirFile) beforeSave() {} + +// +checklocksignore +func (f *fdDirFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.isInfoFile) + stateSinkObject.Save(1, &f.t) +} + +func (f *fdDirFile) afterLoad() {} + +// +checklocksignore +func (f *fdDirFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.isInfoFile) + stateSourceObject.Load(1, &f.t) +} + +func (fdid *fdInfoDir) StateTypeName() string { + return "pkg/sentry/fs/proc.fdInfoDir" +} + +func (fdid *fdInfoDir) StateFields() []string { + return []string{ + "Dir", + "t", + } +} + +func (fdid *fdInfoDir) beforeSave() {} + +// +checklocksignore +func (fdid *fdInfoDir) StateSave(stateSinkObject state.Sink) { + fdid.beforeSave() + stateSinkObject.Save(0, &fdid.Dir) + stateSinkObject.Save(1, &fdid.t) +} + +func (fdid *fdInfoDir) afterLoad() {} + +// +checklocksignore +func (fdid *fdInfoDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fdid.Dir) + stateSourceObject.Load(1, &fdid.t) +} + +func (f *filesystemsData) StateTypeName() string { + return "pkg/sentry/fs/proc.filesystemsData" +} + +func (f *filesystemsData) StateFields() []string { + return []string{} +} + +func (f *filesystemsData) beforeSave() {} + +// +checklocksignore +func (f *filesystemsData) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystemsData) afterLoad() {} + +// +checklocksignore +func (f *filesystemsData) StateLoad(stateSourceObject state.Source) { +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/proc.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +// +checklocksignore +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +// +checklocksignore +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (i *taskOwnedInodeOps) StateTypeName() string { + return "pkg/sentry/fs/proc.taskOwnedInodeOps" +} + +func (i *taskOwnedInodeOps) StateFields() []string { + return []string{ + "InodeOperations", + "t", + } +} + +func (i *taskOwnedInodeOps) beforeSave() {} + +// +checklocksignore +func (i *taskOwnedInodeOps) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeOperations) + stateSinkObject.Save(1, &i.t) +} + +func (i *taskOwnedInodeOps) afterLoad() {} + +// +checklocksignore +func (i *taskOwnedInodeOps) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeOperations) + stateSourceObject.Load(1, &i.t) +} + +func (s *staticFileInodeOps) StateTypeName() string { + return "pkg/sentry/fs/proc.staticFileInodeOps" +} + +func (s *staticFileInodeOps) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeStaticFileGetter", + } +} + +func (s *staticFileInodeOps) beforeSave() {} + +// +checklocksignore +func (s *staticFileInodeOps) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) + stateSinkObject.Save(1, &s.InodeStaticFileGetter) +} + +func (s *staticFileInodeOps) afterLoad() {} + +// +checklocksignore +func (s *staticFileInodeOps) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) + stateSourceObject.Load(1, &s.InodeStaticFileGetter) +} + +func (d *loadavgData) StateTypeName() string { + return "pkg/sentry/fs/proc.loadavgData" +} + +func (d *loadavgData) StateFields() []string { + return []string{} +} + +func (d *loadavgData) beforeSave() {} + +// +checklocksignore +func (d *loadavgData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() +} + +func (d *loadavgData) afterLoad() {} + +// +checklocksignore +func (d *loadavgData) StateLoad(stateSourceObject state.Source) { +} + +func (d *meminfoData) StateTypeName() string { + return "pkg/sentry/fs/proc.meminfoData" +} + +func (d *meminfoData) StateFields() []string { + return []string{ + "k", + } +} + +func (d *meminfoData) beforeSave() {} + +// +checklocksignore +func (d *meminfoData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.k) +} + +func (d *meminfoData) afterLoad() {} + +// +checklocksignore +func (d *meminfoData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.k) +} + +func (mif *mountInfoFile) StateTypeName() string { + return "pkg/sentry/fs/proc.mountInfoFile" +} + +func (mif *mountInfoFile) StateFields() []string { + return []string{ + "t", + } +} + +func (mif *mountInfoFile) beforeSave() {} + +// +checklocksignore +func (mif *mountInfoFile) StateSave(stateSinkObject state.Sink) { + mif.beforeSave() + stateSinkObject.Save(0, &mif.t) +} + +func (mif *mountInfoFile) afterLoad() {} + +// +checklocksignore +func (mif *mountInfoFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mif.t) +} + +func (mf *mountsFile) StateTypeName() string { + return "pkg/sentry/fs/proc.mountsFile" +} + +func (mf *mountsFile) StateFields() []string { + return []string{ + "t", + } +} + +func (mf *mountsFile) beforeSave() {} + +// +checklocksignore +func (mf *mountsFile) StateSave(stateSinkObject state.Sink) { + mf.beforeSave() + stateSinkObject.Save(0, &mf.t) +} + +func (mf *mountsFile) afterLoad() {} + +// +checklocksignore +func (mf *mountsFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mf.t) +} + +func (n *ifinet6) StateTypeName() string { + return "pkg/sentry/fs/proc.ifinet6" +} + +func (n *ifinet6) StateFields() []string { + return []string{ + "s", + } +} + +func (n *ifinet6) beforeSave() {} + +// +checklocksignore +func (n *ifinet6) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *ifinet6) afterLoad() {} + +// +checklocksignore +func (n *ifinet6) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netDev) StateTypeName() string { + return "pkg/sentry/fs/proc.netDev" +} + +func (n *netDev) StateFields() []string { + return []string{ + "s", + } +} + +func (n *netDev) beforeSave() {} + +// +checklocksignore +func (n *netDev) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *netDev) afterLoad() {} + +// +checklocksignore +func (n *netDev) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netSnmp) StateTypeName() string { + return "pkg/sentry/fs/proc.netSnmp" +} + +func (n *netSnmp) StateFields() []string { + return []string{ + "s", + } +} + +func (n *netSnmp) beforeSave() {} + +// +checklocksignore +func (n *netSnmp) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *netSnmp) afterLoad() {} + +// +checklocksignore +func (n *netSnmp) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netRoute) StateTypeName() string { + return "pkg/sentry/fs/proc.netRoute" +} + +func (n *netRoute) StateFields() []string { + return []string{ + "s", + } +} + +func (n *netRoute) beforeSave() {} + +// +checklocksignore +func (n *netRoute) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *netRoute) afterLoad() {} + +// +checklocksignore +func (n *netRoute) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netUnix) StateTypeName() string { + return "pkg/sentry/fs/proc.netUnix" +} + +func (n *netUnix) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netUnix) beforeSave() {} + +// +checklocksignore +func (n *netUnix) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netUnix) afterLoad() {} + +// +checklocksignore +func (n *netUnix) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (n *netTCP) StateTypeName() string { + return "pkg/sentry/fs/proc.netTCP" +} + +func (n *netTCP) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netTCP) beforeSave() {} + +// +checklocksignore +func (n *netTCP) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netTCP) afterLoad() {} + +// +checklocksignore +func (n *netTCP) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (n *netTCP6) StateTypeName() string { + return "pkg/sentry/fs/proc.netTCP6" +} + +func (n *netTCP6) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netTCP6) beforeSave() {} + +// +checklocksignore +func (n *netTCP6) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netTCP6) afterLoad() {} + +// +checklocksignore +func (n *netTCP6) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (n *netUDP) StateTypeName() string { + return "pkg/sentry/fs/proc.netUDP" +} + +func (n *netUDP) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netUDP) beforeSave() {} + +// +checklocksignore +func (n *netUDP) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netUDP) afterLoad() {} + +// +checklocksignore +func (n *netUDP) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (p *proc) StateTypeName() string { + return "pkg/sentry/fs/proc.proc" +} + +func (p *proc) StateFields() []string { + return []string{ + "Dir", + "k", + "pidns", + "cgroupControllers", + } +} + +func (p *proc) beforeSave() {} + +// +checklocksignore +func (p *proc) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Dir) + stateSinkObject.Save(1, &p.k) + stateSinkObject.Save(2, &p.pidns) + stateSinkObject.Save(3, &p.cgroupControllers) +} + +func (p *proc) afterLoad() {} + +// +checklocksignore +func (p *proc) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Dir) + stateSourceObject.Load(1, &p.k) + stateSourceObject.Load(2, &p.pidns) + stateSourceObject.Load(3, &p.cgroupControllers) +} + +func (s *self) StateTypeName() string { + return "pkg/sentry/fs/proc.self" +} + +func (s *self) StateFields() []string { + return []string{ + "Symlink", + "pidns", + } +} + +func (s *self) beforeSave() {} + +// +checklocksignore +func (s *self) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Symlink) + stateSinkObject.Save(1, &s.pidns) +} + +func (s *self) afterLoad() {} + +// +checklocksignore +func (s *self) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Symlink) + stateSourceObject.Load(1, &s.pidns) +} + +func (s *threadSelf) StateTypeName() string { + return "pkg/sentry/fs/proc.threadSelf" +} + +func (s *threadSelf) StateFields() []string { + return []string{ + "Symlink", + "pidns", + } +} + +func (s *threadSelf) beforeSave() {} + +// +checklocksignore +func (s *threadSelf) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Symlink) + stateSinkObject.Save(1, &s.pidns) +} + +func (s *threadSelf) afterLoad() {} + +// +checklocksignore +func (s *threadSelf) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Symlink) + stateSourceObject.Load(1, &s.pidns) +} + +func (rpf *rootProcFile) StateTypeName() string { + return "pkg/sentry/fs/proc.rootProcFile" +} + +func (rpf *rootProcFile) StateFields() []string { + return []string{ + "iops", + } +} + +func (rpf *rootProcFile) beforeSave() {} + +// +checklocksignore +func (rpf *rootProcFile) StateSave(stateSinkObject state.Sink) { + rpf.beforeSave() + stateSinkObject.Save(0, &rpf.iops) +} + +func (rpf *rootProcFile) afterLoad() {} + +// +checklocksignore +func (rpf *rootProcFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rpf.iops) +} + +func (s *statData) StateTypeName() string { + return "pkg/sentry/fs/proc.statData" +} + +func (s *statData) StateFields() []string { + return []string{ + "k", + } +} + +func (s *statData) beforeSave() {} + +// +checklocksignore +func (s *statData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.k) +} + +func (s *statData) afterLoad() {} + +// +checklocksignore +func (s *statData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.k) +} + +func (d *mmapMinAddrData) StateTypeName() string { + return "pkg/sentry/fs/proc.mmapMinAddrData" +} + +func (d *mmapMinAddrData) StateFields() []string { + return []string{ + "k", + } +} + +func (d *mmapMinAddrData) beforeSave() {} + +// +checklocksignore +func (d *mmapMinAddrData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.k) +} + +func (d *mmapMinAddrData) afterLoad() {} + +// +checklocksignore +func (d *mmapMinAddrData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.k) +} + +func (o *overcommitMemory) StateTypeName() string { + return "pkg/sentry/fs/proc.overcommitMemory" +} + +func (o *overcommitMemory) StateFields() []string { + return []string{} +} + +func (o *overcommitMemory) beforeSave() {} + +// +checklocksignore +func (o *overcommitMemory) StateSave(stateSinkObject state.Sink) { + o.beforeSave() +} + +func (o *overcommitMemory) afterLoad() {} + +// +checklocksignore +func (o *overcommitMemory) StateLoad(stateSourceObject state.Source) { +} + +func (m *maxMapCount) StateTypeName() string { + return "pkg/sentry/fs/proc.maxMapCount" +} + +func (m *maxMapCount) StateFields() []string { + return []string{} +} + +func (m *maxMapCount) beforeSave() {} + +// +checklocksignore +func (m *maxMapCount) StateSave(stateSinkObject state.Sink) { + m.beforeSave() +} + +func (m *maxMapCount) afterLoad() {} + +// +checklocksignore +func (m *maxMapCount) StateLoad(stateSourceObject state.Source) { +} + +func (h *hostname) StateTypeName() string { + return "pkg/sentry/fs/proc.hostname" +} + +func (h *hostname) StateFields() []string { + return []string{ + "SimpleFileInode", + } +} + +func (h *hostname) beforeSave() {} + +// +checklocksignore +func (h *hostname) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.SimpleFileInode) +} + +func (h *hostname) afterLoad() {} + +// +checklocksignore +func (h *hostname) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.SimpleFileInode) +} + +func (hf *hostnameFile) StateTypeName() string { + return "pkg/sentry/fs/proc.hostnameFile" +} + +func (hf *hostnameFile) StateFields() []string { + return []string{} +} + +func (hf *hostnameFile) beforeSave() {} + +// +checklocksignore +func (hf *hostnameFile) StateSave(stateSinkObject state.Sink) { + hf.beforeSave() +} + +func (hf *hostnameFile) afterLoad() {} + +// +checklocksignore +func (hf *hostnameFile) StateLoad(stateSourceObject state.Source) { +} + +func (t *tcpMemInode) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpMemInode" +} + +func (t *tcpMemInode) StateFields() []string { + return []string{ + "SimpleFileInode", + "dir", + "s", + "size", + } +} + +// +checklocksignore +func (t *tcpMemInode) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.SimpleFileInode) + stateSinkObject.Save(1, &t.dir) + stateSinkObject.Save(2, &t.s) + stateSinkObject.Save(3, &t.size) +} + +// +checklocksignore +func (t *tcpMemInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.SimpleFileInode) + stateSourceObject.Load(1, &t.dir) + stateSourceObject.LoadWait(2, &t.s) + stateSourceObject.Load(3, &t.size) + stateSourceObject.AfterLoad(t.afterLoad) +} + +func (f *tcpMemFile) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpMemFile" +} + +func (f *tcpMemFile) StateFields() []string { + return []string{ + "tcpMemInode", + } +} + +func (f *tcpMemFile) beforeSave() {} + +// +checklocksignore +func (f *tcpMemFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.tcpMemInode) +} + +func (f *tcpMemFile) afterLoad() {} + +// +checklocksignore +func (f *tcpMemFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.tcpMemInode) +} + +func (s *tcpSack) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpSack" +} + +func (s *tcpSack) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "enabled", + } +} + +func (s *tcpSack) beforeSave() {} + +// +checklocksignore +func (s *tcpSack) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SimpleFileInode) + stateSinkObject.Save(1, &s.stack) + stateSinkObject.Save(2, &s.enabled) +} + +// +checklocksignore +func (s *tcpSack) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SimpleFileInode) + stateSourceObject.LoadWait(1, &s.stack) + stateSourceObject.Load(2, &s.enabled) + stateSourceObject.AfterLoad(s.afterLoad) +} + +func (f *tcpSackFile) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpSackFile" +} + +func (f *tcpSackFile) StateFields() []string { + return []string{ + "tcpSack", + "stack", + } +} + +func (f *tcpSackFile) beforeSave() {} + +// +checklocksignore +func (f *tcpSackFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.tcpSack) + stateSinkObject.Save(1, &f.stack) +} + +func (f *tcpSackFile) afterLoad() {} + +// +checklocksignore +func (f *tcpSackFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.tcpSack) + stateSourceObject.LoadWait(1, &f.stack) +} + +func (r *tcpRecovery) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpRecovery" +} + +func (r *tcpRecovery) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "recovery", + } +} + +func (r *tcpRecovery) beforeSave() {} + +// +checklocksignore +func (r *tcpRecovery) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.SimpleFileInode) + stateSinkObject.Save(1, &r.stack) + stateSinkObject.Save(2, &r.recovery) +} + +func (r *tcpRecovery) afterLoad() {} + +// +checklocksignore +func (r *tcpRecovery) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.SimpleFileInode) + stateSourceObject.LoadWait(1, &r.stack) + stateSourceObject.Load(2, &r.recovery) +} + +func (f *tcpRecoveryFile) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpRecoveryFile" +} + +func (f *tcpRecoveryFile) StateFields() []string { + return []string{ + "tcpRecovery", + "stack", + } +} + +func (f *tcpRecoveryFile) beforeSave() {} + +// +checklocksignore +func (f *tcpRecoveryFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.tcpRecovery) + stateSinkObject.Save(1, &f.stack) +} + +func (f *tcpRecoveryFile) afterLoad() {} + +// +checklocksignore +func (f *tcpRecoveryFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.tcpRecovery) + stateSourceObject.LoadWait(1, &f.stack) +} + +func (ipf *ipForwarding) StateTypeName() string { + return "pkg/sentry/fs/proc.ipForwarding" +} + +func (ipf *ipForwarding) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "enabled", + } +} + +func (ipf *ipForwarding) beforeSave() {} + +// +checklocksignore +func (ipf *ipForwarding) StateSave(stateSinkObject state.Sink) { + ipf.beforeSave() + stateSinkObject.Save(0, &ipf.SimpleFileInode) + stateSinkObject.Save(1, &ipf.stack) + stateSinkObject.Save(2, &ipf.enabled) +} + +// +checklocksignore +func (ipf *ipForwarding) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ipf.SimpleFileInode) + stateSourceObject.LoadWait(1, &ipf.stack) + stateSourceObject.Load(2, &ipf.enabled) + stateSourceObject.AfterLoad(ipf.afterLoad) +} + +func (f *ipForwardingFile) StateTypeName() string { + return "pkg/sentry/fs/proc.ipForwardingFile" +} + +func (f *ipForwardingFile) StateFields() []string { + return []string{ + "ipf", + "stack", + } +} + +func (f *ipForwardingFile) beforeSave() {} + +// +checklocksignore +func (f *ipForwardingFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.ipf) + stateSinkObject.Save(1, &f.stack) +} + +func (f *ipForwardingFile) afterLoad() {} + +// +checklocksignore +func (f *ipForwardingFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.ipf) + stateSourceObject.LoadWait(1, &f.stack) +} + +func (in *portRangeInode) StateTypeName() string { + return "pkg/sentry/fs/proc.portRangeInode" +} + +func (in *portRangeInode) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "start", + "end", + } +} + +func (in *portRangeInode) beforeSave() {} + +// +checklocksignore +func (in *portRangeInode) StateSave(stateSinkObject state.Sink) { + in.beforeSave() + stateSinkObject.Save(0, &in.SimpleFileInode) + stateSinkObject.Save(1, &in.stack) + stateSinkObject.Save(2, &in.start) + stateSinkObject.Save(3, &in.end) +} + +func (in *portRangeInode) afterLoad() {} + +// +checklocksignore +func (in *portRangeInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &in.SimpleFileInode) + stateSourceObject.LoadWait(1, &in.stack) + stateSourceObject.Load(2, &in.start) + stateSourceObject.Load(3, &in.end) +} + +func (pf *portRangeFile) StateTypeName() string { + return "pkg/sentry/fs/proc.portRangeFile" +} + +func (pf *portRangeFile) StateFields() []string { + return []string{ + "inode", + } +} + +func (pf *portRangeFile) beforeSave() {} + +// +checklocksignore +func (pf *portRangeFile) StateSave(stateSinkObject state.Sink) { + pf.beforeSave() + stateSinkObject.Save(0, &pf.inode) +} + +func (pf *portRangeFile) afterLoad() {} + +// +checklocksignore +func (pf *portRangeFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &pf.inode) +} + +func (t *taskDir) StateTypeName() string { + return "pkg/sentry/fs/proc.taskDir" +} + +func (t *taskDir) StateFields() []string { + return []string{ + "Dir", + "t", + } +} + +func (t *taskDir) beforeSave() {} + +// +checklocksignore +func (t *taskDir) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Dir) + stateSinkObject.Save(1, &t.t) +} + +func (t *taskDir) afterLoad() {} + +// +checklocksignore +func (t *taskDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Dir) + stateSourceObject.Load(1, &t.t) +} + +func (s *subtasks) StateTypeName() string { + return "pkg/sentry/fs/proc.subtasks" +} + +func (s *subtasks) StateFields() []string { + return []string{ + "Dir", + "t", + "p", + } +} + +func (s *subtasks) beforeSave() {} + +// +checklocksignore +func (s *subtasks) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Dir) + stateSinkObject.Save(1, &s.t) + stateSinkObject.Save(2, &s.p) +} + +func (s *subtasks) afterLoad() {} + +// +checklocksignore +func (s *subtasks) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Dir) + stateSourceObject.Load(1, &s.t) + stateSourceObject.Load(2, &s.p) +} + +func (f *subtasksFile) StateTypeName() string { + return "pkg/sentry/fs/proc.subtasksFile" +} + +func (f *subtasksFile) StateFields() []string { + return []string{ + "t", + "pidns", + } +} + +func (f *subtasksFile) beforeSave() {} + +// +checklocksignore +func (f *subtasksFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) + stateSinkObject.Save(1, &f.pidns) +} + +func (f *subtasksFile) afterLoad() {} + +// +checklocksignore +func (f *subtasksFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) + stateSourceObject.Load(1, &f.pidns) +} + +func (e *exe) StateTypeName() string { + return "pkg/sentry/fs/proc.exe" +} + +func (e *exe) StateFields() []string { + return []string{ + "Symlink", + "t", + } +} + +func (e *exe) beforeSave() {} + +// +checklocksignore +func (e *exe) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Symlink) + stateSinkObject.Save(1, &e.t) +} + +func (e *exe) afterLoad() {} + +// +checklocksignore +func (e *exe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Symlink) + stateSourceObject.Load(1, &e.t) +} + +func (e *cwd) StateTypeName() string { + return "pkg/sentry/fs/proc.cwd" +} + +func (e *cwd) StateFields() []string { + return []string{ + "Symlink", + "t", + } +} + +func (e *cwd) beforeSave() {} + +// +checklocksignore +func (e *cwd) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Symlink) + stateSinkObject.Save(1, &e.t) +} + +func (e *cwd) afterLoad() {} + +// +checklocksignore +func (e *cwd) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Symlink) + stateSourceObject.Load(1, &e.t) +} + +func (n *namespaceSymlink) StateTypeName() string { + return "pkg/sentry/fs/proc.namespaceSymlink" +} + +func (n *namespaceSymlink) StateFields() []string { + return []string{ + "Symlink", + "t", + } +} + +func (n *namespaceSymlink) beforeSave() {} + +// +checklocksignore +func (n *namespaceSymlink) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.Symlink) + stateSinkObject.Save(1, &n.t) +} + +func (n *namespaceSymlink) afterLoad() {} + +// +checklocksignore +func (n *namespaceSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.Symlink) + stateSourceObject.Load(1, &n.t) +} + +func (m *memData) StateTypeName() string { + return "pkg/sentry/fs/proc.memData" +} + +func (m *memData) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (m *memData) beforeSave() {} + +// +checklocksignore +func (m *memData) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.SimpleFileInode) + stateSinkObject.Save(1, &m.t) +} + +func (m *memData) afterLoad() {} + +// +checklocksignore +func (m *memData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.SimpleFileInode) + stateSourceObject.Load(1, &m.t) +} + +func (m *memDataFile) StateTypeName() string { + return "pkg/sentry/fs/proc.memDataFile" +} + +func (m *memDataFile) StateFields() []string { + return []string{ + "t", + } +} + +func (m *memDataFile) beforeSave() {} + +// +checklocksignore +func (m *memDataFile) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.t) +} + +func (m *memDataFile) afterLoad() {} + +// +checklocksignore +func (m *memDataFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.t) +} + +func (md *mapsData) StateTypeName() string { + return "pkg/sentry/fs/proc.mapsData" +} + +func (md *mapsData) StateFields() []string { + return []string{ + "t", + } +} + +func (md *mapsData) beforeSave() {} + +// +checklocksignore +func (md *mapsData) StateSave(stateSinkObject state.Sink) { + md.beforeSave() + stateSinkObject.Save(0, &md.t) +} + +func (md *mapsData) afterLoad() {} + +// +checklocksignore +func (md *mapsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &md.t) +} + +func (sd *smapsData) StateTypeName() string { + return "pkg/sentry/fs/proc.smapsData" +} + +func (sd *smapsData) StateFields() []string { + return []string{ + "t", + } +} + +func (sd *smapsData) beforeSave() {} + +// +checklocksignore +func (sd *smapsData) StateSave(stateSinkObject state.Sink) { + sd.beforeSave() + stateSinkObject.Save(0, &sd.t) +} + +func (sd *smapsData) afterLoad() {} + +// +checklocksignore +func (sd *smapsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sd.t) +} + +func (s *taskStatData) StateTypeName() string { + return "pkg/sentry/fs/proc.taskStatData" +} + +func (s *taskStatData) StateFields() []string { + return []string{ + "t", + "tgstats", + "pidns", + } +} + +func (s *taskStatData) beforeSave() {} + +// +checklocksignore +func (s *taskStatData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.t) + stateSinkObject.Save(1, &s.tgstats) + stateSinkObject.Save(2, &s.pidns) +} + +func (s *taskStatData) afterLoad() {} + +// +checklocksignore +func (s *taskStatData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.t) + stateSourceObject.Load(1, &s.tgstats) + stateSourceObject.Load(2, &s.pidns) +} + +func (s *statmData) StateTypeName() string { + return "pkg/sentry/fs/proc.statmData" +} + +func (s *statmData) StateFields() []string { + return []string{ + "t", + } +} + +func (s *statmData) beforeSave() {} + +// +checklocksignore +func (s *statmData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.t) +} + +func (s *statmData) afterLoad() {} + +// +checklocksignore +func (s *statmData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.t) +} + +func (s *statusData) StateTypeName() string { + return "pkg/sentry/fs/proc.statusData" +} + +func (s *statusData) StateFields() []string { + return []string{ + "t", + "pidns", + } +} + +func (s *statusData) beforeSave() {} + +// +checklocksignore +func (s *statusData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.t) + stateSinkObject.Save(1, &s.pidns) +} + +func (s *statusData) afterLoad() {} + +// +checklocksignore +func (s *statusData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.t) + stateSourceObject.Load(1, &s.pidns) +} + +func (i *ioData) StateTypeName() string { + return "pkg/sentry/fs/proc.ioData" +} + +func (i *ioData) StateFields() []string { + return []string{ + "ioUsage", + } +} + +func (i *ioData) beforeSave() {} + +// +checklocksignore +func (i *ioData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.ioUsage) +} + +func (i *ioData) afterLoad() {} + +// +checklocksignore +func (i *ioData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.ioUsage) +} + +func (c *comm) StateTypeName() string { + return "pkg/sentry/fs/proc.comm" +} + +func (c *comm) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (c *comm) beforeSave() {} + +// +checklocksignore +func (c *comm) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.SimpleFileInode) + stateSinkObject.Save(1, &c.t) +} + +func (c *comm) afterLoad() {} + +// +checklocksignore +func (c *comm) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.SimpleFileInode) + stateSourceObject.Load(1, &c.t) +} + +func (f *commFile) StateTypeName() string { + return "pkg/sentry/fs/proc.commFile" +} + +func (f *commFile) StateFields() []string { + return []string{ + "t", + } +} + +func (f *commFile) beforeSave() {} + +// +checklocksignore +func (f *commFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) +} + +func (f *commFile) afterLoad() {} + +// +checklocksignore +func (f *commFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) +} + +func (a *auxvec) StateTypeName() string { + return "pkg/sentry/fs/proc.auxvec" +} + +func (a *auxvec) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (a *auxvec) beforeSave() {} + +// +checklocksignore +func (a *auxvec) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.SimpleFileInode) + stateSinkObject.Save(1, &a.t) +} + +func (a *auxvec) afterLoad() {} + +// +checklocksignore +func (a *auxvec) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.SimpleFileInode) + stateSourceObject.Load(1, &a.t) +} + +func (f *auxvecFile) StateTypeName() string { + return "pkg/sentry/fs/proc.auxvecFile" +} + +func (f *auxvecFile) StateFields() []string { + return []string{ + "t", + } +} + +func (f *auxvecFile) beforeSave() {} + +// +checklocksignore +func (f *auxvecFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) +} + +func (f *auxvecFile) afterLoad() {} + +// +checklocksignore +func (f *auxvecFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) +} + +func (o *oomScoreAdj) StateTypeName() string { + return "pkg/sentry/fs/proc.oomScoreAdj" +} + +func (o *oomScoreAdj) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (o *oomScoreAdj) beforeSave() {} + +// +checklocksignore +func (o *oomScoreAdj) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.SimpleFileInode) + stateSinkObject.Save(1, &o.t) +} + +func (o *oomScoreAdj) afterLoad() {} + +// +checklocksignore +func (o *oomScoreAdj) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.SimpleFileInode) + stateSourceObject.Load(1, &o.t) +} + +func (f *oomScoreAdjFile) StateTypeName() string { + return "pkg/sentry/fs/proc.oomScoreAdjFile" +} + +func (f *oomScoreAdjFile) StateFields() []string { + return []string{ + "t", + } +} + +func (f *oomScoreAdjFile) beforeSave() {} + +// +checklocksignore +func (f *oomScoreAdjFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) +} + +func (f *oomScoreAdjFile) afterLoad() {} + +// +checklocksignore +func (f *oomScoreAdjFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) +} + +func (imio *idMapInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/proc.idMapInodeOperations" +} + +func (imio *idMapInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "t", + "gids", + } +} + +func (imio *idMapInodeOperations) beforeSave() {} + +// +checklocksignore +func (imio *idMapInodeOperations) StateSave(stateSinkObject state.Sink) { + imio.beforeSave() + stateSinkObject.Save(0, &imio.InodeSimpleAttributes) + stateSinkObject.Save(1, &imio.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &imio.t) + stateSinkObject.Save(3, &imio.gids) +} + +func (imio *idMapInodeOperations) afterLoad() {} + +// +checklocksignore +func (imio *idMapInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &imio.InodeSimpleAttributes) + stateSourceObject.Load(1, &imio.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &imio.t) + stateSourceObject.Load(3, &imio.gids) +} + +func (imfo *idMapFileOperations) StateTypeName() string { + return "pkg/sentry/fs/proc.idMapFileOperations" +} + +func (imfo *idMapFileOperations) StateFields() []string { + return []string{ + "iops", + } +} + +func (imfo *idMapFileOperations) beforeSave() {} + +// +checklocksignore +func (imfo *idMapFileOperations) StateSave(stateSinkObject state.Sink) { + imfo.beforeSave() + stateSinkObject.Save(0, &imfo.iops) +} + +func (imfo *idMapFileOperations) afterLoad() {} + +// +checklocksignore +func (imfo *idMapFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &imfo.iops) +} + +func (u *uptime) StateTypeName() string { + return "pkg/sentry/fs/proc.uptime" +} + +func (u *uptime) StateFields() []string { + return []string{ + "SimpleFileInode", + "startTime", + } +} + +func (u *uptime) beforeSave() {} + +// +checklocksignore +func (u *uptime) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.SimpleFileInode) + stateSinkObject.Save(1, &u.startTime) +} + +func (u *uptime) afterLoad() {} + +// +checklocksignore +func (u *uptime) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.SimpleFileInode) + stateSourceObject.Load(1, &u.startTime) +} + +func (f *uptimeFile) StateTypeName() string { + return "pkg/sentry/fs/proc.uptimeFile" +} + +func (f *uptimeFile) StateFields() []string { + return []string{ + "startTime", + } +} + +func (f *uptimeFile) beforeSave() {} + +// +checklocksignore +func (f *uptimeFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.startTime) +} + +func (f *uptimeFile) afterLoad() {} + +// +checklocksignore +func (f *uptimeFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.startTime) +} + +func (v *versionData) StateTypeName() string { + return "pkg/sentry/fs/proc.versionData" +} + +func (v *versionData) StateFields() []string { + return []string{ + "k", + } +} + +func (v *versionData) beforeSave() {} + +// +checklocksignore +func (v *versionData) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.k) +} + +func (v *versionData) afterLoad() {} + +// +checklocksignore +func (v *versionData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.k) +} + +func init() { + state.Register((*execArgInode)(nil)) + state.Register((*execArgFile)(nil)) + state.Register((*fdDir)(nil)) + state.Register((*fdDirFile)(nil)) + state.Register((*fdInfoDir)(nil)) + state.Register((*filesystemsData)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*taskOwnedInodeOps)(nil)) + state.Register((*staticFileInodeOps)(nil)) + state.Register((*loadavgData)(nil)) + state.Register((*meminfoData)(nil)) + state.Register((*mountInfoFile)(nil)) + state.Register((*mountsFile)(nil)) + state.Register((*ifinet6)(nil)) + state.Register((*netDev)(nil)) + state.Register((*netSnmp)(nil)) + state.Register((*netRoute)(nil)) + state.Register((*netUnix)(nil)) + state.Register((*netTCP)(nil)) + state.Register((*netTCP6)(nil)) + state.Register((*netUDP)(nil)) + state.Register((*proc)(nil)) + state.Register((*self)(nil)) + state.Register((*threadSelf)(nil)) + state.Register((*rootProcFile)(nil)) + state.Register((*statData)(nil)) + state.Register((*mmapMinAddrData)(nil)) + state.Register((*overcommitMemory)(nil)) + state.Register((*maxMapCount)(nil)) + state.Register((*hostname)(nil)) + state.Register((*hostnameFile)(nil)) + state.Register((*tcpMemInode)(nil)) + state.Register((*tcpMemFile)(nil)) + state.Register((*tcpSack)(nil)) + state.Register((*tcpSackFile)(nil)) + state.Register((*tcpRecovery)(nil)) + state.Register((*tcpRecoveryFile)(nil)) + state.Register((*ipForwarding)(nil)) + state.Register((*ipForwardingFile)(nil)) + state.Register((*portRangeInode)(nil)) + state.Register((*portRangeFile)(nil)) + state.Register((*taskDir)(nil)) + state.Register((*subtasks)(nil)) + state.Register((*subtasksFile)(nil)) + state.Register((*exe)(nil)) + state.Register((*cwd)(nil)) + state.Register((*namespaceSymlink)(nil)) + state.Register((*memData)(nil)) + state.Register((*memDataFile)(nil)) + state.Register((*mapsData)(nil)) + state.Register((*smapsData)(nil)) + state.Register((*taskStatData)(nil)) + state.Register((*statmData)(nil)) + state.Register((*statusData)(nil)) + state.Register((*ioData)(nil)) + state.Register((*comm)(nil)) + state.Register((*commFile)(nil)) + state.Register((*auxvec)(nil)) + state.Register((*auxvecFile)(nil)) + state.Register((*oomScoreAdj)(nil)) + state.Register((*oomScoreAdjFile)(nil)) + state.Register((*idMapInodeOperations)(nil)) + state.Register((*idMapFileOperations)(nil)) + state.Register((*uptime)(nil)) + state.Register((*uptimeFile)(nil)) + state.Register((*versionData)(nil)) +} diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD deleted file mode 100644 index 90bd32345..000000000 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ /dev/null @@ -1,36 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "seqfile", - srcs = ["seqfile.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/proc/device", - "//pkg/sentry/kernel/time", - "//pkg/sync", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "seqfile_test", - size = "small", - srcs = ["seqfile_test.go"], - library = ":seqfile", - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/ramfs", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go new file mode 100644 index 000000000..1cd9b313b --- /dev/null +++ b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go @@ -0,0 +1,106 @@ +// automatically generated by stateify. + +package seqfile + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SeqData) StateTypeName() string { + return "pkg/sentry/fs/proc/seqfile.SeqData" +} + +func (s *SeqData) StateFields() []string { + return []string{ + "Buf", + "Handle", + } +} + +func (s *SeqData) beforeSave() {} + +// +checklocksignore +func (s *SeqData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Buf) + stateSinkObject.Save(1, &s.Handle) +} + +func (s *SeqData) afterLoad() {} + +// +checklocksignore +func (s *SeqData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Buf) + stateSourceObject.Load(1, &s.Handle) +} + +func (s *SeqFile) StateTypeName() string { + return "pkg/sentry/fs/proc/seqfile.SeqFile" +} + +func (s *SeqFile) StateFields() []string { + return []string{ + "InodeSimpleExtendedAttributes", + "InodeSimpleAttributes", + "SeqSource", + "source", + "generation", + "lastRead", + } +} + +func (s *SeqFile) beforeSave() {} + +// +checklocksignore +func (s *SeqFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleExtendedAttributes) + stateSinkObject.Save(1, &s.InodeSimpleAttributes) + stateSinkObject.Save(2, &s.SeqSource) + stateSinkObject.Save(3, &s.source) + stateSinkObject.Save(4, &s.generation) + stateSinkObject.Save(5, &s.lastRead) +} + +func (s *SeqFile) afterLoad() {} + +// +checklocksignore +func (s *SeqFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleExtendedAttributes) + stateSourceObject.Load(1, &s.InodeSimpleAttributes) + stateSourceObject.Load(2, &s.SeqSource) + stateSourceObject.Load(3, &s.source) + stateSourceObject.Load(4, &s.generation) + stateSourceObject.Load(5, &s.lastRead) +} + +func (sfo *seqFileOperations) StateTypeName() string { + return "pkg/sentry/fs/proc/seqfile.seqFileOperations" +} + +func (sfo *seqFileOperations) StateFields() []string { + return []string{ + "seqFile", + } +} + +func (sfo *seqFileOperations) beforeSave() {} + +// +checklocksignore +func (sfo *seqFileOperations) StateSave(stateSinkObject state.Sink) { + sfo.beforeSave() + stateSinkObject.Save(0, &sfo.seqFile) +} + +func (sfo *seqFileOperations) afterLoad() {} + +// +checklocksignore +func (sfo *seqFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sfo.seqFile) +} + +func init() { + state.Register((*SeqData)(nil)) + state.Register((*SeqFile)(nil)) + state.Register((*seqFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go deleted file mode 100644 index 98e394569..000000000 --- a/pkg/sentry/fs/proc/seqfile/seqfile_test.go +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package seqfile - -import ( - "bytes" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -type seqTest struct { - actual []SeqData - update bool -} - -func (s *seqTest) Init() { - var sq []SeqData - // Create some SeqData. - for i := 0; i < 10; i++ { - var b []byte - for j := 0; j < 10; j++ { - b = append(b, byte(i)) - } - sq = append(sq, SeqData{ - Buf: b, - Handle: &testHandle{i: i}, - }) - } - s.actual = sq -} - -// NeedsUpdate reports whether we need to update the data we've previously read. -func (s *seqTest) NeedsUpdate(int64) bool { - return s.update -} - -// ReadSeqFiledata returns a slice of SeqData which contains elements -// greater than the handle. -func (s *seqTest) ReadSeqFileData(ctx context.Context, handle SeqHandle) ([]SeqData, int64) { - if handle == nil { - return s.actual, 0 - } - h := *handle.(*testHandle) - var ret []SeqData - for _, b := range s.actual { - // We want the next one. - h2 := *b.Handle.(*testHandle) - if h2.i > h.i { - ret = append(ret, b) - } - } - return ret, 0 -} - -// Flatten a slice of slices into one slice. -func flatten(buf ...[]byte) []byte { - var flat []byte - for _, b := range buf { - flat = append(flat, b...) - } - return flat -} - -type testHandle struct { - i int -} - -type testTable struct { - offset int64 - readBufferSize int - expectedData []byte - expectedError error -} - -func runTableTests(ctx context.Context, table []testTable, dirent *fs.Dirent) error { - for _, tt := range table { - file, err := dirent.Inode.InodeOperations.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - return fmt.Errorf("GetFile returned error: %v", err) - } - - data := make([]byte, tt.readBufferSize) - resultLen, err := file.Preadv(ctx, usermem.BytesIOSequence(data), tt.offset) - if err != tt.expectedError { - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (error) => %v expected %v", tt.readBufferSize, tt.offset, err, tt.expectedError) - } - expectedLen := int64(len(tt.expectedData)) - if resultLen != expectedLen { - // We make this just an error so we wall through and print the data below. - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (size) => %v expected %v", tt.readBufferSize, tt.offset, resultLen, expectedLen) - } - if !bytes.Equal(data[:expectedLen], tt.expectedData) { - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (data) => %v expected %v", tt.readBufferSize, tt.offset, data[:expectedLen], tt.expectedData) - } - } - return nil -} - -func TestSeqFile(t *testing.T) { - testSource := &seqTest{} - testSource.Init() - - // Create a file that can be R/W. - ctx := contexttest.Context(t) - m := fs.NewPseudoMountSource(ctx) - contents := map[string]*fs.Inode{ - "foo": NewSeqFileInode(ctx, testSource, m), - } - root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) - - // How about opening it? - inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) - dirent2, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo for n2: %v", err) - } - n2 := dirent2.Inode.InodeOperations - file2, err := n2.GetFile(ctx, dirent2, fs.FileFlags{Read: true, Write: true}) - if err != nil { - t.Fatalf("GetFile returned error: %v", err) - } - - // Writing? - if _, err := file2.Writev(ctx, usermem.BytesIOSequence([]byte("test"))); err == nil { - t.Fatalf("managed to write to n2: %v", err) - } - - // How about reading? - dirent3, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo: %v", err) - } - n3 := dirent3.Inode.InodeOperations - if n2 != n3 { - t.Error("got n2 != n3, want same") - } - - testSource.update = true - - table := []testTable{ - // Read past the end. - {100, 4, []byte{}, io.EOF}, - {110, 4, []byte{}, io.EOF}, - {200, 4, []byte{}, io.EOF}, - // Read a truncated first line. - {0, 4, testSource.actual[0].Buf[:4], nil}, - // Read the whole first line. - {0, 10, testSource.actual[0].Buf, nil}, - // Read the whole first line + 5 bytes of second line. - {0, 15, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:5]), nil}, - // First 4 bytes of the second line. - {10, 4, testSource.actual[1].Buf[:4], nil}, - // Read the two first lines. - {0, 20, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf), nil}, - // Read three lines. - {0, 30, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf), nil}, - // Read everything, but use a bigger buffer than necessary. - {0, 150, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf, testSource.actual[3].Buf, testSource.actual[4].Buf, testSource.actual[5].Buf, testSource.actual[6].Buf, testSource.actual[7].Buf, testSource.actual[8].Buf, testSource.actual[9].Buf), nil}, - // Read the last 3 bytes. - {97, 10, testSource.actual[9].Buf[7:], nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed with testSource.update = %v : %v", testSource.update, err) - } - - // Disable updates and do it again. - testSource.update = false - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed with testSource.update = %v: %v", testSource.update, err) - } -} - -// Test that we behave correctly when the file is updated. -func TestSeqFileFileUpdated(t *testing.T) { - testSource := &seqTest{} - testSource.Init() - testSource.update = true - - // Create a file that can be R/W. - ctx := contexttest.Context(t) - m := fs.NewPseudoMountSource(ctx) - contents := map[string]*fs.Inode{ - "foo": NewSeqFileInode(ctx, testSource, m), - } - root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) - - // How about opening it? - inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) - dirent2, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo for dirent2: %v", err) - } - - table := []testTable{ - {0, 16, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:6]), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed: %v", err) - } - // Delete the first entry. - cut := testSource.actual[0].Buf - testSource.actual = testSource.actual[1:] - - table = []testTable{ - // Try reading buffer 0 with an offset. This will not delete the old data. - {1, 5, cut[1:6], nil}, - // Reset our file by reading at offset 0. - {0, 10, testSource.actual[0].Buf, nil}, - {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil}, - // Read the same data a second time. - {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil}, - // Read the following two lines. - {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after removing first entry: %v", err) - } - - // Add a new duplicate line in the middle (6666...) - after := testSource.actual[5:] - testSource.actual = testSource.actual[:4] - // Note the list must be sorted. - testSource.actual = append(testSource.actual, after[0]) - testSource.actual = append(testSource.actual, after...) - - table = []testTable{ - {50, 20, flatten(testSource.actual[4].Buf, testSource.actual[5].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after adding middle entry: %v", err) - } - // This will be used in a later test. - oldTestData := testSource.actual - - // Delete everything. - testSource.actual = testSource.actual[:0] - table = []testTable{ - {20, 20, []byte{}, io.EOF}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after removing all entries: %v", err) - } - // Restore some of the data. - testSource.actual = oldTestData[:1] - table = []testTable{ - {6, 20, testSource.actual[0].Buf[6:], nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after adding first entry back: %v", err) - } - - // Re-extend the data - testSource.actual = oldTestData - table = []testTable{ - {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after extending testSource: %v", err) - } -} diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go deleted file mode 100644 index 6ef5738e7..000000000 --- a/pkg/sentry/fs/proc/sys_net_test.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/usermem" -) - -func TestQuerySendBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - s.TCPSendBufSize = inet.TCPBufferSize{100, 200, 300} - tmi := &tcpMemInode{s: s, dir: tcpWMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - buf := make([]byte, 100) - dst := usermem.BytesIOSequence(buf) - n, err := tmf.Read(ctx, nil, dst, 0) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if got, want := string(buf[:n]), "100\t200\t300\n"; got != want { - t.Fatalf("Bad string: got %v, want %v", got, want) - } -} - -func TestQueryRecvBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - s.TCPRecvBufSize = inet.TCPBufferSize{100, 200, 300} - tmi := &tcpMemInode{s: s, dir: tcpRMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - buf := make([]byte, 100) - dst := usermem.BytesIOSequence(buf) - n, err := tmf.Read(ctx, nil, dst, 0) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if got, want := string(buf[:n]), "100\t200\t300\n"; got != want { - t.Fatalf("Bad string: got %v, want %v", got, want) - } -} - -var cases = []struct { - str string - initial inet.TCPBufferSize - final inet.TCPBufferSize -}{ - { - str: "", - initial: inet.TCPBufferSize{1, 2, 3}, - final: inet.TCPBufferSize{1, 2, 3}, - }, - { - str: "100\n", - initial: inet.TCPBufferSize{1, 100, 200}, - final: inet.TCPBufferSize{100, 100, 200}, - }, - { - str: "100 200 300\n", - initial: inet.TCPBufferSize{1, 2, 3}, - final: inet.TCPBufferSize{100, 200, 300}, - }, -} - -func TestConfigureSendBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - for _, c := range cases { - s.TCPSendBufSize = c.initial - tmi := &tcpMemInode{s: s, dir: tcpWMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := tmf.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("Write, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if s.TCPSendBufSize != c.final { - t.Errorf("TCPSendBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPSendBufSize, c.final) - } - } -} - -func TestConfigureRecvBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - for _, c := range cases { - s.TCPRecvBufSize = c.initial - tmi := &tcpMemInode{s: s, dir: tcpRMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := tmf.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("Write, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if s.TCPRecvBufSize != c.final { - t.Errorf("TCPRecvBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPRecvBufSize, c.final) - } - } -} - -// TestIPForwarding tests the implementation of -// /proc/sys/net/ipv4/ip_forwarding -func TestIPForwarding(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - - var cases = []struct { - comment string - initial bool - str string - final bool - }{ - { - comment: `Forwarding is disabled; write 1 and enable forwarding`, - initial: false, - str: "1", - final: true, - }, - { - comment: `Forwarding is disabled; write 0 and disable forwarding`, - initial: false, - str: "0", - final: false, - }, - { - comment: `Forwarding is enabled; write 1 and enable forwarding`, - initial: true, - str: "1", - final: true, - }, - { - comment: `Forwarding is enabled; write 0 and disable forwarding`, - initial: true, - str: "0", - final: false, - }, - { - comment: `Forwarding is disabled; write 2404 and enable forwarding`, - initial: false, - str: "2404", - final: true, - }, - { - comment: `Forwarding is enabled; write 2404 and enable forwarding`, - initial: true, - str: "2404", - final: true, - }, - } - for _, c := range cases { - t.Run(c.comment, func(t *testing.T) { - s.IPForwarding = c.initial - ipf := &ipForwarding{stack: s} - file := &ipForwardingFile{ - stack: s, - ipf: ipf, - } - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := file.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("file.Write(ctx, nil, %q, 0) = (%d, %v); want (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if got, want := s.IPForwarding, c.final; got != want { - t.Errorf("s.IPForwarding incorrect; got: %v, want: %v", got, want) - } - - }) - } -} diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD deleted file mode 100644 index b46567cf8..000000000 --- a/pkg/sentry/fs/ramfs/BUILD +++ /dev/null @@ -1,39 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "ramfs", - srcs = [ - "dir.go", - "socket.go", - "symlink.go", - "tree.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/socket/unix/transport", - "//pkg/sync", - "//pkg/syserror", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "ramfs_test", - size = "small", - srcs = ["tree_test.go"], - library = ":ramfs", - deps = [ - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - ], -) diff --git a/pkg/sentry/fs/ramfs/ramfs_state_autogen.go b/pkg/sentry/fs/ramfs/ramfs_state_autogen.go new file mode 100644 index 000000000..53c836415 --- /dev/null +++ b/pkg/sentry/fs/ramfs/ramfs_state_autogen.go @@ -0,0 +1,182 @@ +// automatically generated by stateify. + +package ramfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *Dir) StateTypeName() string { + return "pkg/sentry/fs/ramfs.Dir" +} + +func (d *Dir) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "children", + "dentryMap", + } +} + +func (d *Dir) beforeSave() {} + +// +checklocksignore +func (d *Dir) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.InodeSimpleAttributes) + stateSinkObject.Save(1, &d.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &d.children) + stateSinkObject.Save(3, &d.dentryMap) +} + +func (d *Dir) afterLoad() {} + +// +checklocksignore +func (d *Dir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.InodeSimpleAttributes) + stateSourceObject.Load(1, &d.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &d.children) + stateSourceObject.Load(3, &d.dentryMap) +} + +func (dfo *dirFileOperations) StateTypeName() string { + return "pkg/sentry/fs/ramfs.dirFileOperations" +} + +func (dfo *dirFileOperations) StateFields() []string { + return []string{ + "dirCursor", + "dir", + } +} + +func (dfo *dirFileOperations) beforeSave() {} + +// +checklocksignore +func (dfo *dirFileOperations) StateSave(stateSinkObject state.Sink) { + dfo.beforeSave() + stateSinkObject.Save(0, &dfo.dirCursor) + stateSinkObject.Save(1, &dfo.dir) +} + +func (dfo *dirFileOperations) afterLoad() {} + +// +checklocksignore +func (dfo *dirFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &dfo.dirCursor) + stateSourceObject.Load(1, &dfo.dir) +} + +func (s *Socket) StateTypeName() string { + return "pkg/sentry/fs/ramfs.Socket" +} + +func (s *Socket) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "ep", + } +} + +func (s *Socket) beforeSave() {} + +// +checklocksignore +func (s *Socket) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) + stateSinkObject.Save(1, &s.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &s.ep) +} + +func (s *Socket) afterLoad() {} + +// +checklocksignore +func (s *Socket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) + stateSourceObject.Load(1, &s.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &s.ep) +} + +func (s *socketFileOperations) StateTypeName() string { + return "pkg/sentry/fs/ramfs.socketFileOperations" +} + +func (s *socketFileOperations) StateFields() []string { + return []string{} +} + +func (s *socketFileOperations) beforeSave() {} + +// +checklocksignore +func (s *socketFileOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *socketFileOperations) afterLoad() {} + +// +checklocksignore +func (s *socketFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (s *Symlink) StateTypeName() string { + return "pkg/sentry/fs/ramfs.Symlink" +} + +func (s *Symlink) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "Target", + } +} + +func (s *Symlink) beforeSave() {} + +// +checklocksignore +func (s *Symlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) + stateSinkObject.Save(1, &s.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &s.Target) +} + +func (s *Symlink) afterLoad() {} + +// +checklocksignore +func (s *Symlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) + stateSourceObject.Load(1, &s.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &s.Target) +} + +func (s *symlinkFileOperations) StateTypeName() string { + return "pkg/sentry/fs/ramfs.symlinkFileOperations" +} + +func (s *symlinkFileOperations) StateFields() []string { + return []string{} +} + +func (s *symlinkFileOperations) beforeSave() {} + +// +checklocksignore +func (s *symlinkFileOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *symlinkFileOperations) afterLoad() {} + +// +checklocksignore +func (s *symlinkFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*Dir)(nil)) + state.Register((*dirFileOperations)(nil)) + state.Register((*Socket)(nil)) + state.Register((*socketFileOperations)(nil)) + state.Register((*Symlink)(nil)) + state.Register((*symlinkFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go deleted file mode 100644 index 3e0d1e07e..000000000 --- a/pkg/sentry/fs/ramfs/tree_test.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ramfs - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -func TestMakeDirectoryTree(t *testing.T) { - - for _, test := range []struct { - name string - subdirs []string - }{ - { - name: "abs paths", - subdirs: []string{ - "/tmp", - "/tmp/a/b", - "/tmp/a/c/d", - "/tmp/c", - "/proc", - "/dev/a/b", - "/tmp", - }, - }, - { - name: "rel paths", - subdirs: []string{ - "tmp", - "tmp/a/b", - "tmp/a/c/d", - "tmp/c", - "proc", - "dev/a/b", - "tmp", - }, - }, - } { - ctx := contexttest.Context(t) - mount := fs.NewPseudoMountSource(ctx) - tree, err := MakeDirectoryTree(ctx, mount, test.subdirs) - if err != nil { - t.Errorf("%s: failed to make ramfs tree, got error %v, want nil", test.name, err) - continue - } - - // Expect to be able to find each of the paths. - mm, err := fs.NewMountNamespace(ctx, tree) - if err != nil { - t.Errorf("%s: failed to create mount manager: %v", test.name, err) - continue - } - root := mm.Root() - defer mm.DecRef(ctx) - - for _, p := range test.subdirs { - maxTraversals := uint(0) - if _, err := mm.FindInode(ctx, root, nil, p, &maxTraversals); err != nil { - t.Errorf("%s: failed to find node %s: %v", test.name, p, err) - break - } - } - } -} diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD deleted file mode 100644 index fdbc5f912..000000000 --- a/pkg/sentry/fs/sys/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "sys", - srcs = [ - "device.go", - "devices.go", - "fs.go", - "sys.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/hostarch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/kernel", - ], -) diff --git a/pkg/sentry/fs/sys/sys_state_autogen.go b/pkg/sentry/fs/sys/sys_state_autogen.go new file mode 100644 index 000000000..9c4f22243 --- /dev/null +++ b/pkg/sentry/fs/sys/sys_state_autogen.go @@ -0,0 +1,61 @@ +// automatically generated by stateify. + +package sys + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *cpunum) StateTypeName() string { + return "pkg/sentry/fs/sys.cpunum" +} + +func (c *cpunum) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeStaticFileGetter", + } +} + +func (c *cpunum) beforeSave() {} + +// +checklocksignore +func (c *cpunum) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.InodeSimpleAttributes) + stateSinkObject.Save(1, &c.InodeStaticFileGetter) +} + +func (c *cpunum) afterLoad() {} + +// +checklocksignore +func (c *cpunum) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.InodeSimpleAttributes) + stateSourceObject.Load(1, &c.InodeStaticFileGetter) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/sys.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +// +checklocksignore +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +// +checklocksignore +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*cpunum)(nil)) + state.Register((*filesystem)(nil)) +} diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD deleted file mode 100644 index 0148b33cf..000000000 --- a/pkg/sentry/fs/timerfd/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "timerfd", - srcs = ["timerfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel/time", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/timerfd/timerfd_state_autogen.go b/pkg/sentry/fs/timerfd/timerfd_state_autogen.go new file mode 100644 index 000000000..822eecdd6 --- /dev/null +++ b/pkg/sentry/fs/timerfd/timerfd_state_autogen.go @@ -0,0 +1,42 @@ +// automatically generated by stateify. + +package timerfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *TimerOperations) StateTypeName() string { + return "pkg/sentry/fs/timerfd.TimerOperations" +} + +func (t *TimerOperations) StateFields() []string { + return []string{ + "timer", + "val", + } +} + +func (t *TimerOperations) beforeSave() {} + +// +checklocksignore +func (t *TimerOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + if !state.IsZeroValue(&t.events) { + state.Failf("events is %#v, expected zero", &t.events) + } + stateSinkObject.Save(0, &t.timer) + stateSinkObject.Save(1, &t.val) +} + +func (t *TimerOperations) afterLoad() {} + +// +checklocksignore +func (t *TimerOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.timer) + stateSourceObject.Load(1, &t.val) +} + +func init() { + state.Register((*TimerOperations)(nil)) +} diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD deleted file mode 100644 index 511fffb43..000000000 --- a/pkg/sentry/fs/tmpfs/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "tmpfs", - srcs = [ - "device.go", - "file_regular.go", - "fs.go", - "inode_file.go", - "tmpfs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/safemem", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fsmetric", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "tmpfs_test", - size = "small", - srcs = ["file_test.go"], - library = ":tmpfs", - deps = [ - "//pkg/context", - "//pkg/hostarch", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usage", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go deleted file mode 100644 index 1718f9372..000000000 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/usermem" -) - -func newFileInode(ctx context.Context) *fs.Inode { - m := fs.NewCachingMountSource(ctx, &Filesystem{}, fs.MountSourceFlags{}) - iops := NewInMemoryFile(ctx, usage.Tmpfs, fs.WithCurrentTime(ctx, fs.UnstableAttr{})) - return fs.NewInode(ctx, iops, m, fs.StableAttr{ - DeviceID: tmpfsDevice.DeviceID(), - InodeID: tmpfsDevice.NextIno(), - BlockSize: hostarch.PageSize, - Type: fs.RegularFile, - }) -} - -func newFile(ctx context.Context) *fs.File { - inode := newFileInode(ctx) - f, _ := inode.GetFile(ctx, fs.NewDirent(ctx, inode, "stub"), fs.FileFlags{Read: true, Write: true}) - return f -} - -// Allocate once, write twice. -func TestGrow(t *testing.T) { - ctx := contexttest.Context(t) - f := newFile(ctx) - defer f.DecRef(ctx) - - abuf := bytes.Repeat([]byte{'a'}, 68) - n, err := f.Pwritev(ctx, usermem.BytesIOSequence(abuf), 0) - if n != int64(len(abuf)) || err != nil { - t.Fatalf("Pwritev got (%d, %v) want (%d, nil)", n, err, len(abuf)) - } - - bbuf := bytes.Repeat([]byte{'b'}, 856) - n, err = f.Pwritev(ctx, usermem.BytesIOSequence(bbuf), 68) - if n != int64(len(bbuf)) || err != nil { - t.Fatalf("Pwritev got (%d, %v) want (%d, nil)", n, err, len(bbuf)) - } - - rbuf := make([]byte, len(abuf)+len(bbuf)) - n, err = f.Preadv(ctx, usermem.BytesIOSequence(rbuf), 0) - if n != int64(len(rbuf)) || err != nil { - t.Fatalf("Preadv got (%d, %v) want (%d, nil)", n, err, len(rbuf)) - } - - if want := append(abuf, bbuf...); !bytes.Equal(rbuf, want) { - t.Fatalf("Read %v, want %v", rbuf, want) - } -} diff --git a/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go b/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go new file mode 100644 index 000000000..ab5f75fea --- /dev/null +++ b/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go @@ -0,0 +1,211 @@ +// automatically generated by stateify. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *regularFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.regularFileOperations" +} + +func (r *regularFileOperations) StateFields() []string { + return []string{ + "iops", + } +} + +func (r *regularFileOperations) beforeSave() {} + +// +checklocksignore +func (r *regularFileOperations) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.iops) +} + +func (r *regularFileOperations) afterLoad() {} + +// +checklocksignore +func (r *regularFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.iops) +} + +func (f *Filesystem) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Filesystem" +} + +func (f *Filesystem) StateFields() []string { + return []string{} +} + +func (f *Filesystem) beforeSave() {} + +// +checklocksignore +func (f *Filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *Filesystem) afterLoad() {} + +// +checklocksignore +func (f *Filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (f *fileInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.fileInodeOperations" +} + +func (f *fileInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleExtendedAttributes", + "kernel", + "memUsage", + "attr", + "mappings", + "writableMappingPages", + "data", + "seals", + } +} + +func (f *fileInodeOperations) beforeSave() {} + +// +checklocksignore +func (f *fileInodeOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeSimpleExtendedAttributes) + stateSinkObject.Save(1, &f.kernel) + stateSinkObject.Save(2, &f.memUsage) + stateSinkObject.Save(3, &f.attr) + stateSinkObject.Save(4, &f.mappings) + stateSinkObject.Save(5, &f.writableMappingPages) + stateSinkObject.Save(6, &f.data) + stateSinkObject.Save(7, &f.seals) +} + +func (f *fileInodeOperations) afterLoad() {} + +// +checklocksignore +func (f *fileInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeSimpleExtendedAttributes) + stateSourceObject.Load(1, &f.kernel) + stateSourceObject.Load(2, &f.memUsage) + stateSourceObject.Load(3, &f.attr) + stateSourceObject.Load(4, &f.mappings) + stateSourceObject.Load(5, &f.writableMappingPages) + stateSourceObject.Load(6, &f.data) + stateSourceObject.Load(7, &f.seals) +} + +func (d *Dir) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Dir" +} + +func (d *Dir) StateFields() []string { + return []string{ + "ramfsDir", + "kernel", + } +} + +func (d *Dir) beforeSave() {} + +// +checklocksignore +func (d *Dir) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.ramfsDir) + stateSinkObject.Save(1, &d.kernel) +} + +// +checklocksignore +func (d *Dir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.ramfsDir) + stateSourceObject.Load(1, &d.kernel) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (s *Symlink) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Symlink" +} + +func (s *Symlink) StateFields() []string { + return []string{ + "Symlink", + } +} + +func (s *Symlink) beforeSave() {} + +// +checklocksignore +func (s *Symlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Symlink) +} + +func (s *Symlink) afterLoad() {} + +// +checklocksignore +func (s *Symlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Symlink) +} + +func (s *Socket) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Socket" +} + +func (s *Socket) StateFields() []string { + return []string{ + "Socket", + } +} + +func (s *Socket) beforeSave() {} + +// +checklocksignore +func (s *Socket) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Socket) +} + +func (s *Socket) afterLoad() {} + +// +checklocksignore +func (s *Socket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Socket) +} + +func (f *Fifo) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Fifo" +} + +func (f *Fifo) StateFields() []string { + return []string{ + "InodeOperations", + } +} + +func (f *Fifo) beforeSave() {} + +// +checklocksignore +func (f *Fifo) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeOperations) +} + +func (f *Fifo) afterLoad() {} + +// +checklocksignore +func (f *Fifo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeOperations) +} + +func init() { + state.Register((*regularFileOperations)(nil)) + state.Register((*Filesystem)(nil)) + state.Register((*fileInodeOperations)(nil)) + state.Register((*Dir)(nil)) + state.Register((*Symlink)(nil)) + state.Register((*Socket)(nil)) + state.Register((*Fifo)(nil)) +} diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD deleted file mode 100644 index 5933cb67b..000000000 --- a/pkg/sentry/fs/tty/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "tty", - srcs = [ - "dir.go", - "fs.go", - "line_discipline.go", - "master.go", - "queue.go", - "replica.go", - "terminal.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/marshal/primitive", - "//pkg/refs", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "tty_test", - size = "small", - srcs = ["tty_test.go"], - library = ":tty", - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/contexttest", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/tty/tty_state_autogen.go b/pkg/sentry/fs/tty/tty_state_autogen.go new file mode 100644 index 000000000..2fb0a8d27 --- /dev/null +++ b/pkg/sentry/fs/tty/tty_state_autogen.go @@ -0,0 +1,407 @@ +// automatically generated by stateify. + +package tty + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *dirInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.dirInodeOperations" +} + +func (d *dirInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "msrc", + "master", + "replicas", + "dentryMap", + "next", + } +} + +func (d *dirInodeOperations) beforeSave() {} + +// +checklocksignore +func (d *dirInodeOperations) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.InodeSimpleAttributes) + stateSinkObject.Save(1, &d.msrc) + stateSinkObject.Save(2, &d.master) + stateSinkObject.Save(3, &d.replicas) + stateSinkObject.Save(4, &d.dentryMap) + stateSinkObject.Save(5, &d.next) +} + +func (d *dirInodeOperations) afterLoad() {} + +// +checklocksignore +func (d *dirInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.InodeSimpleAttributes) + stateSourceObject.Load(1, &d.msrc) + stateSourceObject.Load(2, &d.master) + stateSourceObject.Load(3, &d.replicas) + stateSourceObject.Load(4, &d.dentryMap) + stateSourceObject.Load(5, &d.next) +} + +func (df *dirFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.dirFileOperations" +} + +func (df *dirFileOperations) StateFields() []string { + return []string{ + "di", + "dirCursor", + } +} + +func (df *dirFileOperations) beforeSave() {} + +// +checklocksignore +func (df *dirFileOperations) StateSave(stateSinkObject state.Sink) { + df.beforeSave() + stateSinkObject.Save(0, &df.di) + stateSinkObject.Save(1, &df.dirCursor) +} + +func (df *dirFileOperations) afterLoad() {} + +// +checklocksignore +func (df *dirFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &df.di) + stateSourceObject.Load(1, &df.dirCursor) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/tty.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +// +checklocksignore +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +// +checklocksignore +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (s *superOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.superOperations" +} + +func (s *superOperations) StateFields() []string { + return []string{} +} + +func (s *superOperations) beforeSave() {} + +// +checklocksignore +func (s *superOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *superOperations) afterLoad() {} + +// +checklocksignore +func (s *superOperations) StateLoad(stateSourceObject state.Source) { +} + +func (l *lineDiscipline) StateTypeName() string { + return "pkg/sentry/fs/tty.lineDiscipline" +} + +func (l *lineDiscipline) StateFields() []string { + return []string{ + "size", + "inQueue", + "outQueue", + "termios", + "column", + } +} + +func (l *lineDiscipline) beforeSave() {} + +// +checklocksignore +func (l *lineDiscipline) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + if !state.IsZeroValue(&l.masterWaiter) { + state.Failf("masterWaiter is %#v, expected zero", &l.masterWaiter) + } + if !state.IsZeroValue(&l.replicaWaiter) { + state.Failf("replicaWaiter is %#v, expected zero", &l.replicaWaiter) + } + stateSinkObject.Save(0, &l.size) + stateSinkObject.Save(1, &l.inQueue) + stateSinkObject.Save(2, &l.outQueue) + stateSinkObject.Save(3, &l.termios) + stateSinkObject.Save(4, &l.column) +} + +func (l *lineDiscipline) afterLoad() {} + +// +checklocksignore +func (l *lineDiscipline) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.size) + stateSourceObject.Load(1, &l.inQueue) + stateSourceObject.Load(2, &l.outQueue) + stateSourceObject.Load(3, &l.termios) + stateSourceObject.Load(4, &l.column) +} + +func (o *outputQueueTransformer) StateTypeName() string { + return "pkg/sentry/fs/tty.outputQueueTransformer" +} + +func (o *outputQueueTransformer) StateFields() []string { + return []string{} +} + +func (o *outputQueueTransformer) beforeSave() {} + +// +checklocksignore +func (o *outputQueueTransformer) StateSave(stateSinkObject state.Sink) { + o.beforeSave() +} + +func (o *outputQueueTransformer) afterLoad() {} + +// +checklocksignore +func (o *outputQueueTransformer) StateLoad(stateSourceObject state.Source) { +} + +func (i *inputQueueTransformer) StateTypeName() string { + return "pkg/sentry/fs/tty.inputQueueTransformer" +} + +func (i *inputQueueTransformer) StateFields() []string { + return []string{} +} + +func (i *inputQueueTransformer) beforeSave() {} + +// +checklocksignore +func (i *inputQueueTransformer) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *inputQueueTransformer) afterLoad() {} + +// +checklocksignore +func (i *inputQueueTransformer) StateLoad(stateSourceObject state.Source) { +} + +func (mi *masterInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.masterInodeOperations" +} + +func (mi *masterInodeOperations) StateFields() []string { + return []string{ + "SimpleFileInode", + "d", + } +} + +func (mi *masterInodeOperations) beforeSave() {} + +// +checklocksignore +func (mi *masterInodeOperations) StateSave(stateSinkObject state.Sink) { + mi.beforeSave() + stateSinkObject.Save(0, &mi.SimpleFileInode) + stateSinkObject.Save(1, &mi.d) +} + +func (mi *masterInodeOperations) afterLoad() {} + +// +checklocksignore +func (mi *masterInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mi.SimpleFileInode) + stateSourceObject.Load(1, &mi.d) +} + +func (mf *masterFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.masterFileOperations" +} + +func (mf *masterFileOperations) StateFields() []string { + return []string{ + "d", + "t", + } +} + +func (mf *masterFileOperations) beforeSave() {} + +// +checklocksignore +func (mf *masterFileOperations) StateSave(stateSinkObject state.Sink) { + mf.beforeSave() + stateSinkObject.Save(0, &mf.d) + stateSinkObject.Save(1, &mf.t) +} + +func (mf *masterFileOperations) afterLoad() {} + +// +checklocksignore +func (mf *masterFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mf.d) + stateSourceObject.Load(1, &mf.t) +} + +func (q *queue) StateTypeName() string { + return "pkg/sentry/fs/tty.queue" +} + +func (q *queue) StateFields() []string { + return []string{ + "readBuf", + "waitBuf", + "waitBufLen", + "readable", + "transformer", + } +} + +func (q *queue) beforeSave() {} + +// +checklocksignore +func (q *queue) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.readBuf) + stateSinkObject.Save(1, &q.waitBuf) + stateSinkObject.Save(2, &q.waitBufLen) + stateSinkObject.Save(3, &q.readable) + stateSinkObject.Save(4, &q.transformer) +} + +func (q *queue) afterLoad() {} + +// +checklocksignore +func (q *queue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.readBuf) + stateSourceObject.Load(1, &q.waitBuf) + stateSourceObject.Load(2, &q.waitBufLen) + stateSourceObject.Load(3, &q.readable) + stateSourceObject.Load(4, &q.transformer) +} + +func (si *replicaInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.replicaInodeOperations" +} + +func (si *replicaInodeOperations) StateFields() []string { + return []string{ + "SimpleFileInode", + "d", + "t", + } +} + +func (si *replicaInodeOperations) beforeSave() {} + +// +checklocksignore +func (si *replicaInodeOperations) StateSave(stateSinkObject state.Sink) { + si.beforeSave() + stateSinkObject.Save(0, &si.SimpleFileInode) + stateSinkObject.Save(1, &si.d) + stateSinkObject.Save(2, &si.t) +} + +func (si *replicaInodeOperations) afterLoad() {} + +// +checklocksignore +func (si *replicaInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &si.SimpleFileInode) + stateSourceObject.Load(1, &si.d) + stateSourceObject.Load(2, &si.t) +} + +func (sf *replicaFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.replicaFileOperations" +} + +func (sf *replicaFileOperations) StateFields() []string { + return []string{ + "si", + } +} + +func (sf *replicaFileOperations) beforeSave() {} + +// +checklocksignore +func (sf *replicaFileOperations) StateSave(stateSinkObject state.Sink) { + sf.beforeSave() + stateSinkObject.Save(0, &sf.si) +} + +func (sf *replicaFileOperations) afterLoad() {} + +// +checklocksignore +func (sf *replicaFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sf.si) +} + +func (tm *Terminal) StateTypeName() string { + return "pkg/sentry/fs/tty.Terminal" +} + +func (tm *Terminal) StateFields() []string { + return []string{ + "AtomicRefCount", + "n", + "d", + "ld", + "masterKTTY", + "replicaKTTY", + } +} + +func (tm *Terminal) beforeSave() {} + +// +checklocksignore +func (tm *Terminal) StateSave(stateSinkObject state.Sink) { + tm.beforeSave() + stateSinkObject.Save(0, &tm.AtomicRefCount) + stateSinkObject.Save(1, &tm.n) + stateSinkObject.Save(2, &tm.d) + stateSinkObject.Save(3, &tm.ld) + stateSinkObject.Save(4, &tm.masterKTTY) + stateSinkObject.Save(5, &tm.replicaKTTY) +} + +func (tm *Terminal) afterLoad() {} + +// +checklocksignore +func (tm *Terminal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tm.AtomicRefCount) + stateSourceObject.Load(1, &tm.n) + stateSourceObject.Load(2, &tm.d) + stateSourceObject.Load(3, &tm.ld) + stateSourceObject.Load(4, &tm.masterKTTY) + stateSourceObject.Load(5, &tm.replicaKTTY) +} + +func init() { + state.Register((*dirInodeOperations)(nil)) + state.Register((*dirFileOperations)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*superOperations)(nil)) + state.Register((*lineDiscipline)(nil)) + state.Register((*outputQueueTransformer)(nil)) + state.Register((*inputQueueTransformer)(nil)) + state.Register((*masterInodeOperations)(nil)) + state.Register((*masterFileOperations)(nil)) + state.Register((*queue)(nil)) + state.Register((*replicaInodeOperations)(nil)) + state.Register((*replicaFileOperations)(nil)) + state.Register((*Terminal)(nil)) +} diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go deleted file mode 100644 index 49edee83d..000000000 --- a/pkg/sentry/fs/tty/tty_test.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tty - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/usermem" -) - -func TestSimpleMasterToReplica(t *testing.T) { - ld := newLineDiscipline(linux.DefaultReplicaTermios) - ctx := contexttest.Context(t) - inBytes := []byte("hello, tty\n") - src := usermem.BytesIOSequence(inBytes) - outBytes := make([]byte, 32) - dst := usermem.BytesIOSequence(outBytes) - - // Write to the input queue. - nw, err := ld.inputQueueWrite(ctx, src) - if err != nil { - t.Fatalf("error writing to input queue: %v", err) - } - if nw != int64(len(inBytes)) { - t.Fatalf("wrote wrong length: got %d, want %d", nw, len(inBytes)) - } - - // Read from the input queue. - nr, err := ld.inputQueueRead(ctx, dst) - if err != nil { - t.Fatalf("error reading from input queue: %v", err) - } - if nr != int64(len(inBytes)) { - t.Fatalf("read wrong length: got %d, want %d", nr, len(inBytes)) - } - - outStr := string(outBytes[:nr]) - inStr := string(inBytes) - if outStr != inStr { - t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) - } -} diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD deleted file mode 100644 index 4acc73ee0..000000000 --- a/pkg/sentry/fs/user/BUILD +++ /dev/null @@ -1,41 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "user", - srcs = [ - "path.go", - "user.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "user_test", - size = "small", - srcs = ["user_test.go"], - library = ":user", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/contexttest", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/user/user_state_autogen.go b/pkg/sentry/fs/user/user_state_autogen.go new file mode 100644 index 000000000..8083e036c --- /dev/null +++ b/pkg/sentry/fs/user/user_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package user diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go deleted file mode 100644 index 7f8fa8038..000000000 --- a/pkg/sentry/fs/user/user_test.go +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package user - -import ( - "fmt" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/usermem" -) - -// createEtcPasswd creates /etc/passwd with the given contents and mode. If -// mode is empty, then no file will be created. If mode is not a regular file -// mode, then contents is ignored. -func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode linux.FileMode) error { - if err := root.CreateDirectory(ctx, root, "etc", fs.FilePermsFromMode(0755)); err != nil { - return err - } - etc, err := root.Walk(ctx, root, "etc") - if err != nil { - return err - } - defer etc.DecRef(ctx) - switch mode.FileType() { - case 0: - // Don't create anything. - return nil - case linux.S_IFREG: - passwd, err := etc.Create(ctx, root, "passwd", fs.FileFlags{Write: true}, fs.FilePermsFromMode(mode)) - if err != nil { - return err - } - defer passwd.DecRef(ctx) - if _, err := passwd.Writev(ctx, usermem.BytesIOSequence([]byte(contents))); err != nil { - return err - } - return nil - case linux.S_IFDIR: - return etc.CreateDirectory(ctx, root, "passwd", fs.FilePermsFromMode(mode)) - case linux.S_IFIFO: - return etc.CreateFifo(ctx, root, "passwd", fs.FilePermsFromMode(mode)) - default: - return fmt.Errorf("unknown file type %x", mode.FileType()) - } -} - -// TestGetExecUserHome tests the getExecUserHome function. -func TestGetExecUserHome(t *testing.T) { - tests := map[string]struct { - uid auth.KUID - passwdContents string - passwdMode linux.FileMode - expected string - }{ - "success": { - uid: 1000, - passwdContents: "adin::1000:1111::/home/adin:/bin/sh", - passwdMode: linux.S_IFREG | 0666, - expected: "/home/adin", - }, - "no_perms": { - uid: 1000, - passwdContents: "adin::1000:1111::/home/adin:/bin/sh", - passwdMode: linux.S_IFREG, - expected: "/", - }, - "no_passwd": { - uid: 1000, - expected: "/", - }, - "directory": { - uid: 1000, - passwdMode: linux.S_IFDIR | 0666, - expected: "/", - }, - // Currently we don't allow named pipes. - "named_pipe": { - uid: 1000, - passwdMode: linux.S_IFIFO | 0666, - expected: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - ctx := contexttest.Context(t) - msrc := fs.NewPseudoMountSource(ctx) - rootInode, err := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc, nil /* parent */) - if err != nil { - t.Fatalf("tmpfs.NewDir failed: %v", err) - } - - mns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - defer mns.DecRef(ctx) - root := mns.Root() - defer root.DecRef(ctx) - ctx = fs.WithRoot(ctx, root) - - if err := createEtcPasswd(ctx, root, tc.passwdContents, tc.passwdMode); err != nil { - t.Fatalf("createEtcPasswd failed: %v", err) - } - - got, err := getExecUserHome(ctx, mns, tc.uid) - if err != nil { - t.Fatalf("failed to get user home: %v", err) - } - - if got != tc.expected { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} - -// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. -func TestFindHomeInPasswd(t *testing.T) { - tests := map[string]struct { - uid uint32 - passwd string - expected string - def string - }{ - "empty": { - uid: 1000, - passwd: "", - expected: "/", - def: "/", - }, - "whitespace": { - uid: 1000, - passwd: " ", - expected: "/", - def: "/", - }, - "full": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh", - expected: "/home/adin", - def: "/", - }, - // For better or worse, this is how runc works. - "partial": { - uid: 1000, - passwd: "adin::1000:1111:", - expected: "", - def: "/", - }, - "multiple": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - "duplicate": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", - expected: "/home/adin", - def: "/", - }, - "empty_lines": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) - if err != nil { - t.Fatalf("error parsing passwd: %v", err) - } - if tc.expected != got { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} diff --git a/pkg/sentry/fsbridge/BUILD b/pkg/sentry/fsbridge/BUILD deleted file mode 100644 index 4631db2bb..000000000 --- a/pkg/sentry/fsbridge/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "fsbridge", - srcs = [ - "bridge.go", - "fs.go", - "vfs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/vfs", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fsbridge/fsbridge_state_autogen.go b/pkg/sentry/fsbridge/fsbridge_state_autogen.go new file mode 100644 index 000000000..b4b240bfa --- /dev/null +++ b/pkg/sentry/fsbridge/fsbridge_state_autogen.go @@ -0,0 +1,126 @@ +// automatically generated by stateify. + +package fsbridge + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *fsFile) StateTypeName() string { + return "pkg/sentry/fsbridge.fsFile" +} + +func (f *fsFile) StateFields() []string { + return []string{ + "file", + } +} + +func (f *fsFile) beforeSave() {} + +// +checklocksignore +func (f *fsFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.file) +} + +func (f *fsFile) afterLoad() {} + +// +checklocksignore +func (f *fsFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.file) +} + +func (l *fsLookup) StateTypeName() string { + return "pkg/sentry/fsbridge.fsLookup" +} + +func (l *fsLookup) StateFields() []string { + return []string{ + "mntns", + "root", + "workingDir", + } +} + +func (l *fsLookup) beforeSave() {} + +// +checklocksignore +func (l *fsLookup) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.mntns) + stateSinkObject.Save(1, &l.root) + stateSinkObject.Save(2, &l.workingDir) +} + +func (l *fsLookup) afterLoad() {} + +// +checklocksignore +func (l *fsLookup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.mntns) + stateSourceObject.Load(1, &l.root) + stateSourceObject.Load(2, &l.workingDir) +} + +func (f *VFSFile) StateTypeName() string { + return "pkg/sentry/fsbridge.VFSFile" +} + +func (f *VFSFile) StateFields() []string { + return []string{ + "file", + } +} + +func (f *VFSFile) beforeSave() {} + +// +checklocksignore +func (f *VFSFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.file) +} + +func (f *VFSFile) afterLoad() {} + +// +checklocksignore +func (f *VFSFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.file) +} + +func (l *vfsLookup) StateTypeName() string { + return "pkg/sentry/fsbridge.vfsLookup" +} + +func (l *vfsLookup) StateFields() []string { + return []string{ + "mntns", + "root", + "workingDir", + } +} + +func (l *vfsLookup) beforeSave() {} + +// +checklocksignore +func (l *vfsLookup) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.mntns) + stateSinkObject.Save(1, &l.root) + stateSinkObject.Save(2, &l.workingDir) +} + +func (l *vfsLookup) afterLoad() {} + +// +checklocksignore +func (l *vfsLookup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.mntns) + stateSourceObject.Load(1, &l.root) + stateSourceObject.Load(2, &l.workingDir) +} + +func init() { + state.Register((*fsFile)(nil)) + state.Register((*fsLookup)(nil)) + state.Register((*VFSFile)(nil)) + state.Register((*vfsLookup)(nil)) +} diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD deleted file mode 100644 index 4c9c5b344..000000000 --- a/pkg/sentry/fsimpl/cgroupfs/BUILD +++ /dev/null @@ -1,49 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "dir_refs", - out = "dir_refs.go", - package = "cgroupfs", - prefix = "dir", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "dir", - }, -) - -go_library( - name = "cgroupfs", - srcs = [ - "base.go", - "cgroupfs.go", - "cpu.go", - "cpuacct.go", - "cpuset.go", - "dir_refs.go", - "job.go", - "memory.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/coverage", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/arch", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go new file mode 100644 index 000000000..3142ab6f8 --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go @@ -0,0 +1,687 @@ +// automatically generated by stateify. + +package cgroupfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *controllerCommon) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.controllerCommon" +} + +func (c *controllerCommon) StateFields() []string { + return []string{ + "ty", + "fs", + } +} + +func (c *controllerCommon) beforeSave() {} + +// +checklocksignore +func (c *controllerCommon) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.ty) + stateSinkObject.Save(1, &c.fs) +} + +func (c *controllerCommon) afterLoad() {} + +// +checklocksignore +func (c *controllerCommon) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.ty) + stateSourceObject.Load(1, &c.fs) +} + +func (c *cgroupInode) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cgroupInode" +} + +func (c *cgroupInode) StateFields() []string { + return []string{ + "dir", + "fs", + "ts", + } +} + +func (c *cgroupInode) beforeSave() {} + +// +checklocksignore +func (c *cgroupInode) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.dir) + stateSinkObject.Save(1, &c.fs) + stateSinkObject.Save(2, &c.ts) +} + +func (c *cgroupInode) afterLoad() {} + +// +checklocksignore +func (c *cgroupInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.dir) + stateSourceObject.Load(1, &c.fs) + stateSourceObject.Load(2, &c.ts) +} + +func (d *cgroupProcsData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cgroupProcsData" +} + +func (d *cgroupProcsData) StateFields() []string { + return []string{ + "cgroupInode", + } +} + +func (d *cgroupProcsData) beforeSave() {} + +// +checklocksignore +func (d *cgroupProcsData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.cgroupInode) +} + +func (d *cgroupProcsData) afterLoad() {} + +// +checklocksignore +func (d *cgroupProcsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.cgroupInode) +} + +func (d *tasksData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.tasksData" +} + +func (d *tasksData) StateFields() []string { + return []string{ + "cgroupInode", + } +} + +func (d *tasksData) beforeSave() {} + +// +checklocksignore +func (d *tasksData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.cgroupInode) +} + +func (d *tasksData) afterLoad() {} + +// +checklocksignore +func (d *tasksData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.cgroupInode) +} + +func (fsType *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.FilesystemType" +} + +func (fsType *FilesystemType) StateFields() []string { + return []string{} +} + +func (fsType *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fsType *FilesystemType) StateSave(stateSinkObject state.Sink) { + fsType.beforeSave() +} + +func (fsType *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fsType *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (i *InternalData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.InternalData" +} + +func (i *InternalData) StateFields() []string { + return []string{ + "DefaultControlValues", + } +} + +func (i *InternalData) beforeSave() {} + +// +checklocksignore +func (i *InternalData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.DefaultControlValues) +} + +func (i *InternalData) afterLoad() {} + +// +checklocksignore +func (i *InternalData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.DefaultControlValues) +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + "hierarchyID", + "controllers", + "kcontrollers", + "numCgroups", + "root", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) + stateSinkObject.Save(2, &fs.hierarchyID) + stateSinkObject.Save(3, &fs.controllers) + stateSinkObject.Save(4, &fs.kcontrollers) + stateSinkObject.Save(5, &fs.numCgroups) + stateSinkObject.Save(6, &fs.root) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) + stateSourceObject.Load(2, &fs.hierarchyID) + stateSourceObject.Load(3, &fs.controllers) + stateSourceObject.Load(4, &fs.kcontrollers) + stateSourceObject.Load(5, &fs.numCgroups) + stateSourceObject.Load(6, &fs.root) +} + +func (i *implStatFS) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.implStatFS" +} + +func (i *implStatFS) StateFields() []string { + return []string{} +} + +func (i *implStatFS) beforeSave() {} + +// +checklocksignore +func (i *implStatFS) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *implStatFS) afterLoad() {} + +// +checklocksignore +func (i *implStatFS) StateLoad(stateSourceObject state.Source) { +} + +func (d *dir) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.dir" +} + +func (d *dir) StateFields() []string { + return []string{ + "dirRefs", + "InodeAlwaysValid", + "InodeAttrs", + "InodeNotSymlink", + "InodeDirectoryNoNewChildren", + "OrderedChildren", + "implStatFS", + "locks", + } +} + +func (d *dir) beforeSave() {} + +// +checklocksignore +func (d *dir) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.dirRefs) + stateSinkObject.Save(1, &d.InodeAlwaysValid) + stateSinkObject.Save(2, &d.InodeAttrs) + stateSinkObject.Save(3, &d.InodeNotSymlink) + stateSinkObject.Save(4, &d.InodeDirectoryNoNewChildren) + stateSinkObject.Save(5, &d.OrderedChildren) + stateSinkObject.Save(6, &d.implStatFS) + stateSinkObject.Save(7, &d.locks) +} + +func (d *dir) afterLoad() {} + +// +checklocksignore +func (d *dir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.dirRefs) + stateSourceObject.Load(1, &d.InodeAlwaysValid) + stateSourceObject.Load(2, &d.InodeAttrs) + stateSourceObject.Load(3, &d.InodeNotSymlink) + stateSourceObject.Load(4, &d.InodeDirectoryNoNewChildren) + stateSourceObject.Load(5, &d.OrderedChildren) + stateSourceObject.Load(6, &d.implStatFS) + stateSourceObject.Load(7, &d.locks) +} + +func (c *controllerFile) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.controllerFile" +} + +func (c *controllerFile) StateFields() []string { + return []string{ + "DynamicBytesFile", + } +} + +func (c *controllerFile) beforeSave() {} + +// +checklocksignore +func (c *controllerFile) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.DynamicBytesFile) +} + +func (c *controllerFile) afterLoad() {} + +// +checklocksignore +func (c *controllerFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.DynamicBytesFile) +} + +func (s *staticControllerFile) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.staticControllerFile" +} + +func (s *staticControllerFile) StateFields() []string { + return []string{ + "DynamicBytesFile", + "StaticData", + } +} + +func (s *staticControllerFile) beforeSave() {} + +// +checklocksignore +func (s *staticControllerFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.StaticData) +} + +func (s *staticControllerFile) afterLoad() {} + +// +checklocksignore +func (s *staticControllerFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.StaticData) +} + +func (c *cpuController) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuController" +} + +func (c *cpuController) StateFields() []string { + return []string{ + "controllerCommon", + "cfsPeriod", + "cfsQuota", + "shares", + } +} + +func (c *cpuController) beforeSave() {} + +// +checklocksignore +func (c *cpuController) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.controllerCommon) + stateSinkObject.Save(1, &c.cfsPeriod) + stateSinkObject.Save(2, &c.cfsQuota) + stateSinkObject.Save(3, &c.shares) +} + +func (c *cpuController) afterLoad() {} + +// +checklocksignore +func (c *cpuController) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.controllerCommon) + stateSourceObject.Load(1, &c.cfsPeriod) + stateSourceObject.Load(2, &c.cfsQuota) + stateSourceObject.Load(3, &c.shares) +} + +func (c *cpuacctController) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuacctController" +} + +func (c *cpuacctController) StateFields() []string { + return []string{ + "controllerCommon", + } +} + +func (c *cpuacctController) beforeSave() {} + +// +checklocksignore +func (c *cpuacctController) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.controllerCommon) +} + +func (c *cpuacctController) afterLoad() {} + +// +checklocksignore +func (c *cpuacctController) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.controllerCommon) +} + +func (c *cpuacctCgroup) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuacctCgroup" +} + +func (c *cpuacctCgroup) StateFields() []string { + return []string{ + "cgroupInode", + } +} + +func (c *cpuacctCgroup) beforeSave() {} + +// +checklocksignore +func (c *cpuacctCgroup) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.cgroupInode) +} + +func (c *cpuacctCgroup) afterLoad() {} + +// +checklocksignore +func (c *cpuacctCgroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.cgroupInode) +} + +func (d *cpuacctStatData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuacctStatData" +} + +func (d *cpuacctStatData) StateFields() []string { + return []string{ + "cpuacctCgroup", + } +} + +func (d *cpuacctStatData) beforeSave() {} + +// +checklocksignore +func (d *cpuacctStatData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.cpuacctCgroup) +} + +func (d *cpuacctStatData) afterLoad() {} + +// +checklocksignore +func (d *cpuacctStatData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.cpuacctCgroup) +} + +func (d *cpuacctUsageData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuacctUsageData" +} + +func (d *cpuacctUsageData) StateFields() []string { + return []string{ + "cpuacctCgroup", + } +} + +func (d *cpuacctUsageData) beforeSave() {} + +// +checklocksignore +func (d *cpuacctUsageData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.cpuacctCgroup) +} + +func (d *cpuacctUsageData) afterLoad() {} + +// +checklocksignore +func (d *cpuacctUsageData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.cpuacctCgroup) +} + +func (d *cpuacctUsageUserData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuacctUsageUserData" +} + +func (d *cpuacctUsageUserData) StateFields() []string { + return []string{ + "cpuacctCgroup", + } +} + +func (d *cpuacctUsageUserData) beforeSave() {} + +// +checklocksignore +func (d *cpuacctUsageUserData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.cpuacctCgroup) +} + +func (d *cpuacctUsageUserData) afterLoad() {} + +// +checklocksignore +func (d *cpuacctUsageUserData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.cpuacctCgroup) +} + +func (d *cpuacctUsageSysData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpuacctUsageSysData" +} + +func (d *cpuacctUsageSysData) StateFields() []string { + return []string{ + "cpuacctCgroup", + } +} + +func (d *cpuacctUsageSysData) beforeSave() {} + +// +checklocksignore +func (d *cpuacctUsageSysData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.cpuacctCgroup) +} + +func (d *cpuacctUsageSysData) afterLoad() {} + +// +checklocksignore +func (d *cpuacctUsageSysData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.cpuacctCgroup) +} + +func (c *cpusetController) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpusetController" +} + +func (c *cpusetController) StateFields() []string { + return []string{ + "controllerCommon", + } +} + +func (c *cpusetController) beforeSave() {} + +// +checklocksignore +func (c *cpusetController) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.controllerCommon) +} + +func (c *cpusetController) afterLoad() {} + +// +checklocksignore +func (c *cpusetController) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.controllerCommon) +} + +func (r *dirRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.dirRefs" +} + +func (r *dirRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *dirRefs) beforeSave() {} + +// +checklocksignore +func (r *dirRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *dirRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (c *jobController) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.jobController" +} + +func (c *jobController) StateFields() []string { + return []string{ + "controllerCommon", + "id", + } +} + +func (c *jobController) beforeSave() {} + +// +checklocksignore +func (c *jobController) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.controllerCommon) + stateSinkObject.Save(1, &c.id) +} + +func (c *jobController) afterLoad() {} + +// +checklocksignore +func (c *jobController) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.controllerCommon) + stateSourceObject.Load(1, &c.id) +} + +func (d *jobIDData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.jobIDData" +} + +func (d *jobIDData) StateFields() []string { + return []string{ + "c", + } +} + +func (d *jobIDData) beforeSave() {} + +// +checklocksignore +func (d *jobIDData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.c) +} + +func (d *jobIDData) afterLoad() {} + +// +checklocksignore +func (d *jobIDData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.c) +} + +func (c *memoryController) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.memoryController" +} + +func (c *memoryController) StateFields() []string { + return []string{ + "controllerCommon", + "limitBytes", + } +} + +func (c *memoryController) beforeSave() {} + +// +checklocksignore +func (c *memoryController) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.controllerCommon) + stateSinkObject.Save(1, &c.limitBytes) +} + +func (c *memoryController) afterLoad() {} + +// +checklocksignore +func (c *memoryController) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.controllerCommon) + stateSourceObject.Load(1, &c.limitBytes) +} + +func (d *memoryUsageInBytesData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.memoryUsageInBytesData" +} + +func (d *memoryUsageInBytesData) StateFields() []string { + return []string{} +} + +func (d *memoryUsageInBytesData) beforeSave() {} + +// +checklocksignore +func (d *memoryUsageInBytesData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() +} + +func (d *memoryUsageInBytesData) afterLoad() {} + +// +checklocksignore +func (d *memoryUsageInBytesData) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*controllerCommon)(nil)) + state.Register((*cgroupInode)(nil)) + state.Register((*cgroupProcsData)(nil)) + state.Register((*tasksData)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*InternalData)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*implStatFS)(nil)) + state.Register((*dir)(nil)) + state.Register((*controllerFile)(nil)) + state.Register((*staticControllerFile)(nil)) + state.Register((*cpuController)(nil)) + state.Register((*cpuacctController)(nil)) + state.Register((*cpuacctCgroup)(nil)) + state.Register((*cpuacctStatData)(nil)) + state.Register((*cpuacctUsageData)(nil)) + state.Register((*cpuacctUsageUserData)(nil)) + state.Register((*cpuacctUsageSysData)(nil)) + state.Register((*cpusetController)(nil)) + state.Register((*dirRefs)(nil)) + state.Register((*jobController)(nil)) + state.Register((*jobIDData)(nil)) + state.Register((*memoryController)(nil)) + state.Register((*memoryUsageInBytesData)(nil)) +} diff --git a/pkg/sentry/fsimpl/cgroupfs/dir_refs.go b/pkg/sentry/fsimpl/cgroupfs/dir_refs.go new file mode 100644 index 000000000..c29f0c9ae --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/dir_refs.go @@ -0,0 +1,140 @@ +package cgroupfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const direnableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var dirobj *dir + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type dirRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *dirRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *dirRefs) RefType() string { + return fmt.Sprintf("%T", dirobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *dirRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *dirRefs) LogRefs() bool { + return direnableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *dirRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *dirRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if direnableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *dirRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if direnableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *dirRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if direnableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *dirRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD deleted file mode 100644 index f981ff296..000000000 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ /dev/null @@ -1,65 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "root_inode_refs", - out = "root_inode_refs.go", - package = "devpts", - prefix = "rootInode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "rootInode", - }, -) - -go_library( - name = "devpts", - srcs = [ - "devpts.go", - "line_discipline.go", - "master.go", - "queue.go", - "replica.go", - "root_inode_refs.go", - "terminal.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/unimpl", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "devpts_test", - size = "small", - srcs = ["devpts_test.go"], - library = ":devpts", - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/contexttest", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go new file mode 100644 index 000000000..78e685c7e --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go @@ -0,0 +1,502 @@ +// automatically generated by stateify. + +package devpts + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fstype *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.FilesystemType" +} + +func (fstype *FilesystemType) StateFields() []string { + return []string{ + "initErr", + "fs", + "root", + } +} + +func (fstype *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fstype *FilesystemType) StateSave(stateSinkObject state.Sink) { + fstype.beforeSave() + stateSinkObject.Save(0, &fstype.initErr) + stateSinkObject.Save(1, &fstype.fs) + stateSinkObject.Save(2, &fstype.root) +} + +func (fstype *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fstype *FilesystemType) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fstype.initErr) + stateSourceObject.Load(1, &fstype.fs) + stateSourceObject.Load(2, &fstype.root) +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (i *rootInode) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.rootInode" +} + +func (i *rootInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + "rootInodeRefs", + "locks", + "master", + "replicas", + "nextIdx", + } +} + +func (i *rootInode) beforeSave() {} + +// +checklocksignore +func (i *rootInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.implStatFS) + stateSinkObject.Save(1, &i.InodeAlwaysValid) + stateSinkObject.Save(2, &i.InodeAttrs) + stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(4, &i.InodeNotSymlink) + stateSinkObject.Save(5, &i.InodeTemporary) + stateSinkObject.Save(6, &i.OrderedChildren) + stateSinkObject.Save(7, &i.rootInodeRefs) + stateSinkObject.Save(8, &i.locks) + stateSinkObject.Save(9, &i.master) + stateSinkObject.Save(10, &i.replicas) + stateSinkObject.Save(11, &i.nextIdx) +} + +func (i *rootInode) afterLoad() {} + +// +checklocksignore +func (i *rootInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.implStatFS) + stateSourceObject.Load(1, &i.InodeAlwaysValid) + stateSourceObject.Load(2, &i.InodeAttrs) + stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(4, &i.InodeNotSymlink) + stateSourceObject.Load(5, &i.InodeTemporary) + stateSourceObject.Load(6, &i.OrderedChildren) + stateSourceObject.Load(7, &i.rootInodeRefs) + stateSourceObject.Load(8, &i.locks) + stateSourceObject.Load(9, &i.master) + stateSourceObject.Load(10, &i.replicas) + stateSourceObject.Load(11, &i.nextIdx) +} + +func (i *implStatFS) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.implStatFS" +} + +func (i *implStatFS) StateFields() []string { + return []string{} +} + +func (i *implStatFS) beforeSave() {} + +// +checklocksignore +func (i *implStatFS) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *implStatFS) afterLoad() {} + +// +checklocksignore +func (i *implStatFS) StateLoad(stateSourceObject state.Source) { +} + +func (l *lineDiscipline) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.lineDiscipline" +} + +func (l *lineDiscipline) StateFields() []string { + return []string{ + "size", + "inQueue", + "outQueue", + "termios", + "column", + "masterWaiter", + "replicaWaiter", + } +} + +func (l *lineDiscipline) beforeSave() {} + +// +checklocksignore +func (l *lineDiscipline) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.size) + stateSinkObject.Save(1, &l.inQueue) + stateSinkObject.Save(2, &l.outQueue) + stateSinkObject.Save(3, &l.termios) + stateSinkObject.Save(4, &l.column) + stateSinkObject.Save(5, &l.masterWaiter) + stateSinkObject.Save(6, &l.replicaWaiter) +} + +func (l *lineDiscipline) afterLoad() {} + +// +checklocksignore +func (l *lineDiscipline) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.size) + stateSourceObject.Load(1, &l.inQueue) + stateSourceObject.Load(2, &l.outQueue) + stateSourceObject.Load(3, &l.termios) + stateSourceObject.Load(4, &l.column) + stateSourceObject.Load(5, &l.masterWaiter) + stateSourceObject.Load(6, &l.replicaWaiter) +} + +func (o *outputQueueTransformer) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.outputQueueTransformer" +} + +func (o *outputQueueTransformer) StateFields() []string { + return []string{} +} + +func (o *outputQueueTransformer) beforeSave() {} + +// +checklocksignore +func (o *outputQueueTransformer) StateSave(stateSinkObject state.Sink) { + o.beforeSave() +} + +func (o *outputQueueTransformer) afterLoad() {} + +// +checklocksignore +func (o *outputQueueTransformer) StateLoad(stateSourceObject state.Source) { +} + +func (i *inputQueueTransformer) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.inputQueueTransformer" +} + +func (i *inputQueueTransformer) StateFields() []string { + return []string{} +} + +func (i *inputQueueTransformer) beforeSave() {} + +// +checklocksignore +func (i *inputQueueTransformer) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *inputQueueTransformer) afterLoad() {} + +// +checklocksignore +func (i *inputQueueTransformer) StateLoad(stateSourceObject state.Source) { +} + +func (mi *masterInode) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.masterInode" +} + +func (mi *masterInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + "locks", + "root", + } +} + +func (mi *masterInode) beforeSave() {} + +// +checklocksignore +func (mi *masterInode) StateSave(stateSinkObject state.Sink) { + mi.beforeSave() + stateSinkObject.Save(0, &mi.implStatFS) + stateSinkObject.Save(1, &mi.InodeAttrs) + stateSinkObject.Save(2, &mi.InodeNoopRefCount) + stateSinkObject.Save(3, &mi.InodeNotDirectory) + stateSinkObject.Save(4, &mi.InodeNotSymlink) + stateSinkObject.Save(5, &mi.locks) + stateSinkObject.Save(6, &mi.root) +} + +func (mi *masterInode) afterLoad() {} + +// +checklocksignore +func (mi *masterInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mi.implStatFS) + stateSourceObject.Load(1, &mi.InodeAttrs) + stateSourceObject.Load(2, &mi.InodeNoopRefCount) + stateSourceObject.Load(3, &mi.InodeNotDirectory) + stateSourceObject.Load(4, &mi.InodeNotSymlink) + stateSourceObject.Load(5, &mi.locks) + stateSourceObject.Load(6, &mi.root) +} + +func (mfd *masterFileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.masterFileDescription" +} + +func (mfd *masterFileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + "inode", + "t", + } +} + +func (mfd *masterFileDescription) beforeSave() {} + +// +checklocksignore +func (mfd *masterFileDescription) StateSave(stateSinkObject state.Sink) { + mfd.beforeSave() + stateSinkObject.Save(0, &mfd.vfsfd) + stateSinkObject.Save(1, &mfd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &mfd.LockFD) + stateSinkObject.Save(3, &mfd.inode) + stateSinkObject.Save(4, &mfd.t) +} + +func (mfd *masterFileDescription) afterLoad() {} + +// +checklocksignore +func (mfd *masterFileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mfd.vfsfd) + stateSourceObject.Load(1, &mfd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &mfd.LockFD) + stateSourceObject.Load(3, &mfd.inode) + stateSourceObject.Load(4, &mfd.t) +} + +func (q *queue) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.queue" +} + +func (q *queue) StateFields() []string { + return []string{ + "readBuf", + "waitBuf", + "waitBufLen", + "readable", + "transformer", + } +} + +func (q *queue) beforeSave() {} + +// +checklocksignore +func (q *queue) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.readBuf) + stateSinkObject.Save(1, &q.waitBuf) + stateSinkObject.Save(2, &q.waitBufLen) + stateSinkObject.Save(3, &q.readable) + stateSinkObject.Save(4, &q.transformer) +} + +func (q *queue) afterLoad() {} + +// +checklocksignore +func (q *queue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.readBuf) + stateSourceObject.Load(1, &q.waitBuf) + stateSourceObject.Load(2, &q.waitBufLen) + stateSourceObject.Load(3, &q.readable) + stateSourceObject.Load(4, &q.transformer) +} + +func (ri *replicaInode) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.replicaInode" +} + +func (ri *replicaInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + "locks", + "root", + "t", + } +} + +func (ri *replicaInode) beforeSave() {} + +// +checklocksignore +func (ri *replicaInode) StateSave(stateSinkObject state.Sink) { + ri.beforeSave() + stateSinkObject.Save(0, &ri.implStatFS) + stateSinkObject.Save(1, &ri.InodeAttrs) + stateSinkObject.Save(2, &ri.InodeNoopRefCount) + stateSinkObject.Save(3, &ri.InodeNotDirectory) + stateSinkObject.Save(4, &ri.InodeNotSymlink) + stateSinkObject.Save(5, &ri.locks) + stateSinkObject.Save(6, &ri.root) + stateSinkObject.Save(7, &ri.t) +} + +func (ri *replicaInode) afterLoad() {} + +// +checklocksignore +func (ri *replicaInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ri.implStatFS) + stateSourceObject.Load(1, &ri.InodeAttrs) + stateSourceObject.Load(2, &ri.InodeNoopRefCount) + stateSourceObject.Load(3, &ri.InodeNotDirectory) + stateSourceObject.Load(4, &ri.InodeNotSymlink) + stateSourceObject.Load(5, &ri.locks) + stateSourceObject.Load(6, &ri.root) + stateSourceObject.Load(7, &ri.t) +} + +func (rfd *replicaFileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.replicaFileDescription" +} + +func (rfd *replicaFileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + "inode", + } +} + +func (rfd *replicaFileDescription) beforeSave() {} + +// +checklocksignore +func (rfd *replicaFileDescription) StateSave(stateSinkObject state.Sink) { + rfd.beforeSave() + stateSinkObject.Save(0, &rfd.vfsfd) + stateSinkObject.Save(1, &rfd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &rfd.LockFD) + stateSinkObject.Save(3, &rfd.inode) +} + +func (rfd *replicaFileDescription) afterLoad() {} + +// +checklocksignore +func (rfd *replicaFileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rfd.vfsfd) + stateSourceObject.Load(1, &rfd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &rfd.LockFD) + stateSourceObject.Load(3, &rfd.inode) +} + +func (r *rootInodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.rootInodeRefs" +} + +func (r *rootInodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *rootInodeRefs) beforeSave() {} + +// +checklocksignore +func (r *rootInodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *rootInodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (tm *Terminal) StateTypeName() string { + return "pkg/sentry/fsimpl/devpts.Terminal" +} + +func (tm *Terminal) StateFields() []string { + return []string{ + "n", + "ld", + "masterKTTY", + "replicaKTTY", + } +} + +func (tm *Terminal) beforeSave() {} + +// +checklocksignore +func (tm *Terminal) StateSave(stateSinkObject state.Sink) { + tm.beforeSave() + stateSinkObject.Save(0, &tm.n) + stateSinkObject.Save(1, &tm.ld) + stateSinkObject.Save(2, &tm.masterKTTY) + stateSinkObject.Save(3, &tm.replicaKTTY) +} + +func (tm *Terminal) afterLoad() {} + +// +checklocksignore +func (tm *Terminal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tm.n) + stateSourceObject.Load(1, &tm.ld) + stateSourceObject.Load(2, &tm.masterKTTY) + stateSourceObject.Load(3, &tm.replicaKTTY) +} + +func init() { + state.Register((*FilesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*rootInode)(nil)) + state.Register((*implStatFS)(nil)) + state.Register((*lineDiscipline)(nil)) + state.Register((*outputQueueTransformer)(nil)) + state.Register((*inputQueueTransformer)(nil)) + state.Register((*masterInode)(nil)) + state.Register((*masterFileDescription)(nil)) + state.Register((*queue)(nil)) + state.Register((*replicaInode)(nil)) + state.Register((*replicaFileDescription)(nil)) + state.Register((*rootInodeRefs)(nil)) + state.Register((*Terminal)(nil)) +} diff --git a/pkg/sentry/fsimpl/devpts/devpts_test.go b/pkg/sentry/fsimpl/devpts/devpts_test.go deleted file mode 100644 index 1ef07d702..000000000 --- a/pkg/sentry/fsimpl/devpts/devpts_test.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package devpts - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestSimpleMasterToReplica(t *testing.T) { - ld := newLineDiscipline(linux.DefaultReplicaTermios) - ctx := contexttest.Context(t) - inBytes := []byte("hello, tty\n") - src := usermem.BytesIOSequence(inBytes) - outBytes := make([]byte, 32) - dst := usermem.BytesIOSequence(outBytes) - - // Write to the input queue. - nw, err := ld.inputQueueWrite(ctx, src) - if err != nil { - t.Fatalf("error writing to input queue: %v", err) - } - if nw != int64(len(inBytes)) { - t.Fatalf("wrote wrong length: got %d, want %d", nw, len(inBytes)) - } - - // Read from the input queue. - nr, err := ld.inputQueueRead(ctx, dst) - if err != nil { - t.Fatalf("error reading from input queue: %v", err) - } - if nr != int64(len(inBytes)) { - t.Fatalf("read wrong length: got %d, want %d", nr, len(inBytes)) - } - - outStr := string(outBytes[:nr]) - inStr := string(inBytes) - if outStr != inStr { - t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) - } -} - -type callback func(*waiter.Entry, waiter.EventMask) - -func (cb callback) Callback(entry *waiter.Entry, mask waiter.EventMask) { - cb(entry, mask) -} - -func TestEchoDeadlock(t *testing.T) { - ctx := contexttest.Context(t) - termios := linux.DefaultReplicaTermios - termios.LocalFlags |= linux.ECHO - ld := newLineDiscipline(termios) - outBytes := make([]byte, 32) - dst := usermem.BytesIOSequence(outBytes) - entry := &waiter.Entry{Callback: callback(func(*waiter.Entry, waiter.EventMask) { - ld.inputQueueRead(ctx, dst) - })} - ld.masterWaiter.EventRegister(entry, waiter.ReadableEvents) - defer ld.masterWaiter.EventUnregister(entry) - inBytes := []byte("hello, tty\n") - n, err := ld.inputQueueWrite(ctx, usermem.BytesIOSequence(inBytes)) - if err != nil { - t.Fatalf("inputQueueWrite: %v", err) - } - if int(n) != len(inBytes) { - t.Fatalf("read wrong length: got %d, want %d", n, len(inBytes)) - } - outStr := string(outBytes[:n]) - inStr := string(inBytes) - if outStr != inStr { - t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) - } -} diff --git a/pkg/sentry/fsimpl/devpts/root_inode_refs.go b/pkg/sentry/fsimpl/devpts/root_inode_refs.go new file mode 100644 index 000000000..e53739a90 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/root_inode_refs.go @@ -0,0 +1,140 @@ +package devpts + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const rootInodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var rootInodeobj *rootInode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type rootInodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *rootInodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *rootInodeRefs) RefType() string { + return fmt.Sprintf("%T", rootInodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *rootInodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *rootInodeRefs) LogRefs() bool { + return rootInodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *rootInodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *rootInodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if rootInodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *rootInodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if rootInodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *rootInodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if rootInodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *rootInodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/devtmpfs/BUILD b/pkg/sentry/fsimpl/devtmpfs/BUILD deleted file mode 100644 index e49a04c1b..000000000 --- a/pkg/sentry/fsimpl/devtmpfs/BUILD +++ /dev/null @@ -1,37 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -licenses(["notice"]) - -go_library( - name = "devtmpfs", - srcs = [ - "devtmpfs.go", - "save_restore.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fspath", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/sync", - ], -) - -go_test( - name = "devtmpfs_test", - size = "small", - srcs = ["devtmpfs_test.go"], - library = ":devtmpfs", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fspath", - "//pkg/sentry/contexttest", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - ], -) diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_state_autogen.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_state_autogen.go new file mode 100644 index 000000000..900c7d8fe --- /dev/null +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_state_autogen.go @@ -0,0 +1,41 @@ +// automatically generated by stateify. + +package devtmpfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fst *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/devtmpfs.FilesystemType" +} + +func (fst *FilesystemType) StateFields() []string { + return []string{ + "initErr", + "fs", + "root", + } +} + +func (fst *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fst *FilesystemType) StateSave(stateSinkObject state.Sink) { + fst.beforeSave() + stateSinkObject.Save(0, &fst.initErr) + stateSinkObject.Save(1, &fst.fs) + stateSinkObject.Save(2, &fst.root) +} + +// +checklocksignore +func (fst *FilesystemType) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fst.initErr) + stateSourceObject.Load(1, &fst.fs) + stateSourceObject.Load(2, &fst.root) + stateSourceObject.AfterLoad(fst.afterLoad) +} + +func init() { + state.Register((*FilesystemType)(nil)) +} diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go deleted file mode 100644 index e058eda7a..000000000 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package devtmpfs - -import ( - "path" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -const devPath = "/dev" - -func setupDevtmpfs(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry, func()) { - t.Helper() - - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - // Register tmpfs just so that we can have a root filesystem that isn't - // devtmpfs. - vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - vfsObj.MustRegisterFilesystemType("devtmpfs", &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - - // Create a test mount namespace with devtmpfs mounted at "/dev". - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "tmpfs" /* source */, "tmpfs" /* fsTypeName */, &vfs.MountOptions{}) - if err != nil { - t.Fatalf("failed to create tmpfs root mount: %v", err) - } - root := mntns.Root() - root.IncRef() - devpop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(devPath), - } - if err := vfsObj.MkdirAt(ctx, creds, &devpop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - t.Fatalf("failed to create mount point: %v", err) - } - if _, err := vfsObj.MountAt(ctx, creds, "devtmpfs" /* source */, &devpop, "devtmpfs" /* fsTypeName */, &vfs.MountOptions{}); err != nil { - t.Fatalf("failed to mount devtmpfs: %v", err) - } - - return ctx, creds, vfsObj, root, func() { - root.DecRef(ctx) - mntns.DecRef(ctx) - } -} - -func TestUserspaceInit(t *testing.T) { - ctx, creds, vfsObj, root, cleanup := setupDevtmpfs(t) - defer cleanup() - - a, err := NewAccessor(ctx, vfsObj, creds, "devtmpfs") - if err != nil { - t.Fatalf("failed to create devtmpfs.Accessor: %v", err) - } - defer a.Release(ctx) - - // Create "userspace-initialized" files using a devtmpfs.Accessor. - if err := a.UserspaceInit(ctx); err != nil { - t.Fatalf("failed to userspace-initialize devtmpfs: %v", err) - } - - // Created files should be visible in the test mount namespace. - links := []struct { - source string - target string - }{ - { - source: "fd", - target: "/proc/self/fd", - }, - { - source: "stdin", - target: "/proc/self/fd/0", - }, - { - source: "stdout", - target: "/proc/self/fd/1", - }, - { - source: "stderr", - target: "/proc/self/fd/2", - }, - { - source: "ptmx", - target: "pts/ptmx", - }, - } - - for _, link := range links { - abspath := path.Join(devPath, link.source) - if gotTarget, err := vfsObj.ReadlinkAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(abspath), - }); err != nil || gotTarget != link.target { - t.Errorf("readlink(%q): got (%q, %v), wanted (%q, nil)", abspath, gotTarget, err, link.target) - } - } - - dirs := []string{"shm", "pts"} - for _, dir := range dirs { - abspath := path.Join(devPath, dir) - statx, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(abspath), - }, &vfs.StatOptions{ - Mask: linux.STATX_MODE, - }) - if err != nil { - t.Errorf("stat(%q): got error %v ", abspath, err) - continue - } - if want := uint16(0755) | linux.S_IFDIR; statx.Mode != want { - t.Errorf("stat(%q): got mode %x, want %x", abspath, statx.Mode, want) - } - } -} - -func TestCreateDeviceFile(t *testing.T) { - ctx, creds, vfsObj, root, cleanup := setupDevtmpfs(t) - defer cleanup() - - a, err := NewAccessor(ctx, vfsObj, creds, "devtmpfs") - if err != nil { - t.Fatalf("failed to create devtmpfs.Accessor: %v", err) - } - defer a.Release(ctx) - - devFiles := []struct { - path string - kind vfs.DeviceKind - major uint32 - minor uint32 - perms uint16 - }{ - { - path: "dummy", - kind: vfs.CharDevice, - major: 12, - minor: 34, - perms: 0600, - }, - { - path: "foo/bar", - kind: vfs.BlockDevice, - major: 13, - minor: 35, - perms: 0660, - }, - { - path: "foo/baz", - kind: vfs.CharDevice, - major: 12, - minor: 40, - perms: 0666, - }, - { - path: "a/b/c/d/e", - kind: vfs.BlockDevice, - major: 12, - minor: 34, - perms: 0600, - }, - } - - for _, f := range devFiles { - if err := a.CreateDeviceFile(ctx, f.path, f.kind, f.major, f.minor, f.perms); err != nil { - t.Fatalf("failed to create device file: %v", err) - } - // The device special file should be visible in the test mount namespace. - abspath := path.Join(devPath, f.path) - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(abspath), - }, &vfs.StatOptions{ - Mask: linux.STATX_TYPE | linux.STATX_MODE, - }) - if err != nil { - t.Fatalf("failed to stat device file at %q: %v", abspath, err) - } - if stat.RdevMajor != f.major { - t.Errorf("major device number: got %v, wanted %v", stat.RdevMajor, f.major) - } - if stat.RdevMinor != f.minor { - t.Errorf("minor device number: got %v, wanted %v", stat.RdevMinor, f.minor) - } - wantMode := f.perms - switch f.kind { - case vfs.CharDevice: - wantMode |= linux.S_IFCHR - case vfs.BlockDevice: - wantMode |= linux.S_IFBLK - } - if stat.Mode != wantMode { - t.Errorf("device file mode: got %v, wanted %v", stat.Mode, wantMode) - } - } -} diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD deleted file mode 100644 index c09fdc7f9..000000000 --- a/pkg/sentry/fsimpl/eventfd/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -licenses(["notice"]) - -go_library( - name = "eventfd", - srcs = ["eventfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fdnotifier", - "//pkg/hostarch", - "//pkg/log", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "eventfd_test", - size = "small", - srcs = ["eventfd_test.go"], - library = ":eventfd", - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/contexttest", - "//pkg/sentry/vfs", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fsimpl/eventfd/eventfd_state_autogen.go b/pkg/sentry/fsimpl/eventfd/eventfd_state_autogen.go new file mode 100644 index 000000000..93de7f32e --- /dev/null +++ b/pkg/sentry/fsimpl/eventfd/eventfd_state_autogen.go @@ -0,0 +1,57 @@ +// automatically generated by stateify. + +package eventfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (efd *EventFileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/eventfd.EventFileDescription" +} + +func (efd *EventFileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "queue", + "val", + "semMode", + "hostfd", + } +} + +func (efd *EventFileDescription) beforeSave() {} + +// +checklocksignore +func (efd *EventFileDescription) StateSave(stateSinkObject state.Sink) { + efd.beforeSave() + stateSinkObject.Save(0, &efd.vfsfd) + stateSinkObject.Save(1, &efd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &efd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &efd.NoLockFD) + stateSinkObject.Save(4, &efd.queue) + stateSinkObject.Save(5, &efd.val) + stateSinkObject.Save(6, &efd.semMode) + stateSinkObject.Save(7, &efd.hostfd) +} + +func (efd *EventFileDescription) afterLoad() {} + +// +checklocksignore +func (efd *EventFileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &efd.vfsfd) + stateSourceObject.Load(1, &efd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &efd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &efd.NoLockFD) + stateSourceObject.Load(4, &efd.queue) + stateSourceObject.Load(5, &efd.val) + stateSourceObject.Load(6, &efd.semMode) + stateSourceObject.Load(7, &efd.hostfd) +} + +func init() { + state.Register((*EventFileDescription)(nil)) +} diff --git a/pkg/sentry/fsimpl/eventfd/eventfd_test.go b/pkg/sentry/fsimpl/eventfd/eventfd_test.go deleted file mode 100644 index 85718f813..000000000 --- a/pkg/sentry/fsimpl/eventfd/eventfd_test.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package eventfd - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestEventFD(t *testing.T) { - initVals := []uint64{ - 0, - // Using a non-zero initial value verifies that writing to an - // eventfd signals when the eventfd's counter was already - // non-zero. - 343, - } - - for _, initVal := range initVals { - ctx := contexttest.Context(t) - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - - // Make a new eventfd that is writable. - eventfd, err := New(ctx, vfsObj, initVal, false, linux.O_RDWR) - if err != nil { - t.Fatalf("New() failed: %v", err) - } - defer eventfd.DecRef(ctx) - - // Register a callback for a write event. - w, ch := waiter.NewChannelEntry(nil) - eventfd.EventRegister(&w, waiter.ReadableEvents) - defer eventfd.EventUnregister(&w) - - data := []byte("00000124") - // Create and submit a write request. - n, err := eventfd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatal(err) - } - if n != 8 { - t.Errorf("eventfd.write wrote %d bytes, not full int64", n) - } - - // Check if the callback fired due to the write event. - select { - case <-ch: - default: - t.Errorf("Didn't get notified of EventIn after write") - } - } -} - -func TestEventFDStat(t *testing.T) { - ctx := contexttest.Context(t) - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - - // Make a new eventfd that is writable. - eventfd, err := New(ctx, vfsObj, 0, false, linux.O_RDWR) - if err != nil { - t.Fatalf("New() failed: %v", err) - } - defer eventfd.DecRef(ctx) - - statx, err := eventfd.Stat(ctx, vfs.StatOptions{ - Mask: linux.STATX_BASIC_STATS, - }) - if err != nil { - t.Fatalf("eventfd.Stat failed: %v", err) - } - if statx.Size != 0 { - t.Errorf("eventfd size should be 0") - } -} diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD deleted file mode 100644 index 871df5984..000000000 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ /dev/null @@ -1,92 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "request_list", - out = "request_list.go", - package = "fuse", - prefix = "request", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Request", - "Linker": "*Request", - }, -) - -go_template_instance( - name = "inode_refs", - out = "inode_refs.go", - package = "fuse", - prefix = "inode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "inode", - }, -) - -go_library( - name = "fuse", - srcs = [ - "connection.go", - "connection_control.go", - "dev.go", - "directory.go", - "file.go", - "fusefs.go", - "inode_refs.go", - "read_write.go", - "register.go", - "regular_file.go", - "request_list.go", - "request_response.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/fsimpl/devtmpfs", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "fuse_test", - size = "small", - srcs = [ - "connection_test.go", - "dev_test.go", - "utils_test.go", - ], - library = ":fuse", - deps = [ - "//pkg/abi/linux", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/marshal", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/fuse/connection_test.go b/pkg/sentry/fsimpl/fuse/connection_test.go deleted file mode 100644 index 1fddd858e..000000000 --- a/pkg/sentry/fsimpl/fuse/connection_test.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fuse - -import ( - "math/rand" - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -// TestConnectionInitBlock tests if initialization -// correctly blocks and unblocks the connection. -// Since it's unfeasible to test kernelTask.Block() in unit test, -// the code in Call() are not tested here. -func TestConnectionInitBlock(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - - conn, _, err := newTestConnection(s, k, maxActiveRequestsDefault) - if err != nil { - t.Fatalf("newTestConnection: %v", err) - } - - select { - case <-conn.initializedChan: - t.Fatalf("initializedChan should be blocking before SetInitialized") - default: - } - - conn.SetInitialized() - - select { - case <-conn.initializedChan: - default: - t.Fatalf("initializedChan should not be blocking after SetInitialized") - } -} - -func TestConnectionAbort(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - creds := auth.CredentialsFromContext(s.Ctx) - task := kernel.TaskFromContext(s.Ctx) - - const numRequests uint64 = 256 - - conn, _, err := newTestConnection(s, k, numRequests) - if err != nil { - t.Fatalf("newTestConnection: %v", err) - } - - testObj := &testPayload{ - data: rand.Uint32(), - } - - var futNormal []*futureResponse - - for i := 0; i < int(numRequests); i++ { - req := conn.NewRequest(creds, uint32(i), uint64(i), 0, testObj) - fut, err := conn.callFutureLocked(task, req) - if err != nil { - t.Fatalf("callFutureLocked failed: %v", err) - } - futNormal = append(futNormal, fut) - } - - conn.Abort(s.Ctx) - - // Abort should unblock the initialization channel. - // Note: no test requests are actually blocked on `conn.initializedChan`. - select { - case <-conn.initializedChan: - default: - t.Fatalf("initializedChan should not be blocking after SetInitialized") - } - - // Abort will return ECONNABORTED error to unblocked requests. - for _, fut := range futNormal { - if fut.getResponse().hdr.Error != -int32(unix.ECONNABORTED) { - t.Fatalf("Incorrect error code received for aborted connection: %v", fut.getResponse().hdr.Error) - } - } - - // After abort, Call() should return directly with ENOTCONN. - req := conn.NewRequest(creds, 0, 0, 0, testObj) - _, err = conn.Call(task, req) - if !linuxerr.Equals(linuxerr.ENOTCONN, err) { - t.Fatalf("Incorrect error code received for Call() after connection aborted") - } - -} diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go deleted file mode 100644 index 04250d796..000000000 --- a/pkg/sentry/fsimpl/fuse/dev_test.go +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fuse - -import ( - "fmt" - "math/rand" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -// echoTestOpcode is the Opcode used during testing. The server used in tests -// will simply echo the payload back with the appropriate headers. -const echoTestOpcode linux.FUSEOpcode = 1000 - -// TestFUSECommunication tests that the communication layer between the Sentry and the -// FUSE server daemon works as expected. -func TestFUSECommunication(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - creds := auth.CredentialsFromContext(s.Ctx) - - // Create test cases with different number of concurrent clients and servers. - testCases := []struct { - Name string - NumClients int - NumServers int - MaxActiveRequests uint64 - }{ - { - Name: "SingleClientSingleServer", - NumClients: 1, - NumServers: 1, - MaxActiveRequests: maxActiveRequestsDefault, - }, - { - Name: "SingleClientMultipleServers", - NumClients: 1, - NumServers: 10, - MaxActiveRequests: maxActiveRequestsDefault, - }, - { - Name: "MultipleClientsSingleServer", - NumClients: 10, - NumServers: 1, - MaxActiveRequests: maxActiveRequestsDefault, - }, - { - Name: "MultipleClientsMultipleServers", - NumClients: 10, - NumServers: 10, - MaxActiveRequests: maxActiveRequestsDefault, - }, - { - Name: "RequestCapacityFull", - NumClients: 10, - NumServers: 1, - MaxActiveRequests: 1, - }, - { - Name: "RequestCapacityContinuouslyFull", - NumClients: 100, - NumServers: 2, - MaxActiveRequests: 2, - }, - } - - for _, testCase := range testCases { - t.Run(testCase.Name, func(t *testing.T) { - conn, fd, err := newTestConnection(s, k, testCase.MaxActiveRequests) - if err != nil { - t.Fatalf("newTestConnection: %v", err) - } - - clientsDone := make([]chan struct{}, testCase.NumClients) - serversDone := make([]chan struct{}, testCase.NumServers) - serversKill := make([]chan struct{}, testCase.NumServers) - - // FUSE clients. - for i := 0; i < testCase.NumClients; i++ { - clientsDone[i] = make(chan struct{}) - go func(i int) { - fuseClientRun(t, s, k, conn, creds, uint32(i), uint64(i), clientsDone[i]) - }(i) - } - - // FUSE servers. - for j := 0; j < testCase.NumServers; j++ { - serversDone[j] = make(chan struct{}) - serversKill[j] = make(chan struct{}, 1) // The kill command shouldn't block. - go func(j int) { - fuseServerRun(t, s, k, fd, serversDone[j], serversKill[j]) - }(j) - } - - // Tear down. - // - // Make sure all the clients are done. - for i := 0; i < testCase.NumClients; i++ { - <-clientsDone[i] - } - - // Kill any server that is potentially waiting. - for j := 0; j < testCase.NumServers; j++ { - serversKill[j] <- struct{}{} - } - - // Make sure all the servers are done. - for j := 0; j < testCase.NumServers; j++ { - <-serversDone[j] - } - }) - } -} - -// CallTest makes a request to the server and blocks the invoking -// goroutine until a server responds with a response. Doesn't block -// a kernel.Task. Analogous to Connection.Call but used for testing. -func CallTest(conn *connection, t *kernel.Task, r *Request, i uint32) (*Response, error) { - conn.fd.mu.Lock() - - // Wait until we're certain that a new request can be processed. - for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests { - conn.fd.mu.Unlock() - select { - case <-conn.fd.fullQueueCh: - } - conn.fd.mu.Lock() - } - - fut, err := conn.callFutureLocked(t, r) // No task given. - conn.fd.mu.Unlock() - - if err != nil { - return nil, err - } - - // Resolve the response. - // - // Block without a task. - select { - case <-fut.ch: - } - - // A response is ready. Resolve and return it. - return fut.getResponse(), nil -} - -// ReadTest is analogous to vfs.FileDescription.Read and reads from the FUSE -// device. However, it does so by - not blocking the task that is calling - and -// instead just waits on a channel. The behaviour is essentially the same as -// DeviceFD.Read except it guarantees that the task is not blocked. -func ReadTest(serverTask *kernel.Task, fd *vfs.FileDescription, inIOseq usermem.IOSequence, killServer chan struct{}) (int64, bool, error) { - var err error - var n, total int64 - - dev := fd.Impl().(*DeviceFD) - - // Register for notifications. - w, ch := waiter.NewChannelEntry(nil) - dev.EventRegister(&w, waiter.ReadableEvents) - for { - // Issue the request and break out if it completes with anything other than - // "would block". - n, err = dev.Read(serverTask, inIOseq, vfs.ReadOptions{}) - total += n - if err != syserror.ErrWouldBlock { - break - } - - // Wait for a notification that we should retry. - // Emulate the blocking for when no requests are available - select { - case <-ch: - case <-killServer: - // Server killed by the main program. - return 0, true, nil - } - } - - dev.EventUnregister(&w) - return total, false, err -} - -// fuseClientRun emulates all the actions of a normal FUSE request. It creates -// a header, a payload, calls the server, waits for the response, and processes -// the response. -func fuseClientRun(t *testing.T, s *testutil.System, k *kernel.Kernel, conn *connection, creds *auth.Credentials, pid uint32, inode uint64, clientDone chan struct{}) { - defer func() { clientDone <- struct{}{} }() - - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - clientTask, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("fuse-client-%v", pid), tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatal(err) - } - testObj := &testPayload{ - data: rand.Uint32(), - } - - req := conn.NewRequest(creds, pid, inode, echoTestOpcode, testObj) - - // Queue up a request. - // Analogous to Call except it doesn't block on the task. - resp, err := CallTest(conn, clientTask, req, pid) - if err != nil { - t.Fatalf("CallTaskNonBlock failed: %v", err) - } - - if err = resp.Error(); err != nil { - t.Fatalf("Server responded with an error: %v", err) - } - - var respTestPayload testPayload - if err := resp.UnmarshalPayload(&respTestPayload); err != nil { - t.Fatalf("Unmarshalling payload error: %v", err) - } - - if resp.hdr.Unique != req.hdr.Unique { - t.Fatalf("got response for another request. Expected response for req %v but got response for req %v", - req.hdr.Unique, resp.hdr.Unique) - } - - if respTestPayload.data != testObj.data { - t.Fatalf("read incorrect data. Data expected: %v, but got %v", testObj.data, respTestPayload.data) - } - -} - -// fuseServerRun creates a task and emulates all the actions of a simple FUSE server -// that simply reads a request and echos the same struct back as a response using the -// appropriate headers. -func fuseServerRun(t *testing.T, s *testutil.System, k *kernel.Kernel, fd *vfs.FileDescription, serverDone, killServer chan struct{}) { - defer func() { serverDone <- struct{}{} }() - - // Create the tasks that the server will be using. - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - var readPayload testPayload - - serverTask, err := testutil.CreateTask(s.Ctx, "fuse-server", tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatal(err) - } - - // Read the request. - for { - inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes()) - payloadLen := uint32(readPayload.SizeBytes()) - - // The raed buffer must meet some certain size criteria. - buffSize := inHdrLen + payloadLen - if buffSize < linux.FUSE_MIN_READ_BUFFER { - buffSize = linux.FUSE_MIN_READ_BUFFER - } - inBuf := make([]byte, buffSize) - inIOseq := usermem.BytesIOSequence(inBuf) - - n, serverKilled, err := ReadTest(serverTask, fd, inIOseq, killServer) - if err != nil { - t.Fatalf("Read failed :%v", err) - } - - // Server should shut down. No new requests are going to be made. - if serverKilled { - break - } - - if n <= 0 { - t.Fatalf("Read read no bytes") - } - - var readFUSEHeaderIn linux.FUSEHeaderIn - readFUSEHeaderIn.UnmarshalUnsafe(inBuf[:inHdrLen]) - readPayload.UnmarshalUnsafe(inBuf[inHdrLen : inHdrLen+payloadLen]) - - if readFUSEHeaderIn.Opcode != echoTestOpcode { - t.Fatalf("read incorrect data. Header: %v, Payload: %v", readFUSEHeaderIn, readPayload) - } - - // Write the response. - outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes()) - outBuf := make([]byte, outHdrLen+payloadLen) - outHeader := linux.FUSEHeaderOut{ - Len: outHdrLen + payloadLen, - Error: 0, - Unique: readFUSEHeaderIn.Unique, - } - - // Echo the payload back. - outHeader.MarshalUnsafe(outBuf[:outHdrLen]) - readPayload.MarshalUnsafe(outBuf[outHdrLen:]) - outIOseq := usermem.BytesIOSequence(outBuf) - - _, err = fd.Write(s.Ctx, outIOseq, vfs.WriteOptions{}) - if err != nil { - t.Fatalf("Write failed :%v", err) - } - } -} diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go new file mode 100644 index 000000000..f61e010dc --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go @@ -0,0 +1,560 @@ +// automatically generated by stateify. + +package fuse + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (conn *connection) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.connection" +} + +func (conn *connection) StateFields() []string { + return []string{ + "fd", + "attributeVersion", + "initialized", + "initializedChan", + "connected", + "connInitError", + "connInitSuccess", + "aborted", + "numWaiting", + "asyncNum", + "asyncCongestionThreshold", + "asyncNumMax", + "maxRead", + "maxWrite", + "maxPages", + "minor", + "atomicOTrunc", + "asyncRead", + "writebackCache", + "bigWrites", + "dontMask", + "noOpen", + } +} + +func (conn *connection) beforeSave() {} + +// +checklocksignore +func (conn *connection) StateSave(stateSinkObject state.Sink) { + conn.beforeSave() + var initializedChanValue bool = conn.saveInitializedChan() + stateSinkObject.SaveValue(3, initializedChanValue) + stateSinkObject.Save(0, &conn.fd) + stateSinkObject.Save(1, &conn.attributeVersion) + stateSinkObject.Save(2, &conn.initialized) + stateSinkObject.Save(4, &conn.connected) + stateSinkObject.Save(5, &conn.connInitError) + stateSinkObject.Save(6, &conn.connInitSuccess) + stateSinkObject.Save(7, &conn.aborted) + stateSinkObject.Save(8, &conn.numWaiting) + stateSinkObject.Save(9, &conn.asyncNum) + stateSinkObject.Save(10, &conn.asyncCongestionThreshold) + stateSinkObject.Save(11, &conn.asyncNumMax) + stateSinkObject.Save(12, &conn.maxRead) + stateSinkObject.Save(13, &conn.maxWrite) + stateSinkObject.Save(14, &conn.maxPages) + stateSinkObject.Save(15, &conn.minor) + stateSinkObject.Save(16, &conn.atomicOTrunc) + stateSinkObject.Save(17, &conn.asyncRead) + stateSinkObject.Save(18, &conn.writebackCache) + stateSinkObject.Save(19, &conn.bigWrites) + stateSinkObject.Save(20, &conn.dontMask) + stateSinkObject.Save(21, &conn.noOpen) +} + +func (conn *connection) afterLoad() {} + +// +checklocksignore +func (conn *connection) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &conn.fd) + stateSourceObject.Load(1, &conn.attributeVersion) + stateSourceObject.Load(2, &conn.initialized) + stateSourceObject.Load(4, &conn.connected) + stateSourceObject.Load(5, &conn.connInitError) + stateSourceObject.Load(6, &conn.connInitSuccess) + stateSourceObject.Load(7, &conn.aborted) + stateSourceObject.Load(8, &conn.numWaiting) + stateSourceObject.Load(9, &conn.asyncNum) + stateSourceObject.Load(10, &conn.asyncCongestionThreshold) + stateSourceObject.Load(11, &conn.asyncNumMax) + stateSourceObject.Load(12, &conn.maxRead) + stateSourceObject.Load(13, &conn.maxWrite) + stateSourceObject.Load(14, &conn.maxPages) + stateSourceObject.Load(15, &conn.minor) + stateSourceObject.Load(16, &conn.atomicOTrunc) + stateSourceObject.Load(17, &conn.asyncRead) + stateSourceObject.Load(18, &conn.writebackCache) + stateSourceObject.Load(19, &conn.bigWrites) + stateSourceObject.Load(20, &conn.dontMask) + stateSourceObject.Load(21, &conn.noOpen) + stateSourceObject.LoadValue(3, new(bool), func(y interface{}) { conn.loadInitializedChan(y.(bool)) }) +} + +func (f *fuseDevice) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.fuseDevice" +} + +func (f *fuseDevice) StateFields() []string { + return []string{} +} + +func (f *fuseDevice) beforeSave() {} + +// +checklocksignore +func (f *fuseDevice) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *fuseDevice) afterLoad() {} + +// +checklocksignore +func (f *fuseDevice) StateLoad(stateSourceObject state.Source) { +} + +func (fd *DeviceFD) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.DeviceFD" +} + +func (fd *DeviceFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "nextOpID", + "queue", + "numActiveRequests", + "completions", + "writeCursor", + "writeBuf", + "writeCursorFR", + "waitQueue", + "fullQueueCh", + "fs", + } +} + +func (fd *DeviceFD) beforeSave() {} + +// +checklocksignore +func (fd *DeviceFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + var fullQueueChValue int = fd.saveFullQueueCh() + stateSinkObject.SaveValue(12, fullQueueChValue) + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.NoLockFD) + stateSinkObject.Save(4, &fd.nextOpID) + stateSinkObject.Save(5, &fd.queue) + stateSinkObject.Save(6, &fd.numActiveRequests) + stateSinkObject.Save(7, &fd.completions) + stateSinkObject.Save(8, &fd.writeCursor) + stateSinkObject.Save(9, &fd.writeBuf) + stateSinkObject.Save(10, &fd.writeCursorFR) + stateSinkObject.Save(11, &fd.waitQueue) + stateSinkObject.Save(13, &fd.fs) +} + +func (fd *DeviceFD) afterLoad() {} + +// +checklocksignore +func (fd *DeviceFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.NoLockFD) + stateSourceObject.Load(4, &fd.nextOpID) + stateSourceObject.Load(5, &fd.queue) + stateSourceObject.Load(6, &fd.numActiveRequests) + stateSourceObject.Load(7, &fd.completions) + stateSourceObject.Load(8, &fd.writeCursor) + stateSourceObject.Load(9, &fd.writeBuf) + stateSourceObject.Load(10, &fd.writeCursorFR) + stateSourceObject.Load(11, &fd.waitQueue) + stateSourceObject.Load(13, &fd.fs) + stateSourceObject.LoadValue(12, new(int), func(y interface{}) { fd.loadFullQueueCh(y.(int)) }) +} + +func (fsType *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.FilesystemType" +} + +func (fsType *FilesystemType) StateFields() []string { + return []string{} +} + +func (fsType *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fsType *FilesystemType) StateSave(stateSinkObject state.Sink) { + fsType.beforeSave() +} + +func (fsType *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fsType *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (f *filesystemOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.filesystemOptions" +} + +func (f *filesystemOptions) StateFields() []string { + return []string{ + "mopts", + "uid", + "gid", + "rootMode", + "maxActiveRequests", + "maxRead", + "defaultPermissions", + "allowOther", + } +} + +func (f *filesystemOptions) beforeSave() {} + +// +checklocksignore +func (f *filesystemOptions) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.mopts) + stateSinkObject.Save(1, &f.uid) + stateSinkObject.Save(2, &f.gid) + stateSinkObject.Save(3, &f.rootMode) + stateSinkObject.Save(4, &f.maxActiveRequests) + stateSinkObject.Save(5, &f.maxRead) + stateSinkObject.Save(6, &f.defaultPermissions) + stateSinkObject.Save(7, &f.allowOther) +} + +func (f *filesystemOptions) afterLoad() {} + +// +checklocksignore +func (f *filesystemOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.mopts) + stateSourceObject.Load(1, &f.uid) + stateSourceObject.Load(2, &f.gid) + stateSourceObject.Load(3, &f.rootMode) + stateSourceObject.Load(4, &f.maxActiveRequests) + stateSourceObject.Load(5, &f.maxRead) + stateSourceObject.Load(6, &f.defaultPermissions) + stateSourceObject.Load(7, &f.allowOther) +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + "conn", + "opts", + "umounted", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) + stateSinkObject.Save(2, &fs.conn) + stateSinkObject.Save(3, &fs.opts) + stateSinkObject.Save(4, &fs.umounted) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) + stateSourceObject.Load(2, &fs.conn) + stateSourceObject.Load(3, &fs.opts) + stateSourceObject.Load(4, &fs.umounted) +} + +func (i *inode) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.inode" +} + +func (i *inode) StateFields() []string { + return []string{ + "inodeRefs", + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "OrderedChildren", + "fs", + "metadataMu", + "nodeID", + "locks", + "size", + "attributeVersion", + "attributeTime", + "version", + "link", + } +} + +func (i *inode) beforeSave() {} + +// +checklocksignore +func (i *inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.inodeRefs) + stateSinkObject.Save(1, &i.InodeAlwaysValid) + stateSinkObject.Save(2, &i.InodeAttrs) + stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(4, &i.InodeNotSymlink) + stateSinkObject.Save(5, &i.OrderedChildren) + stateSinkObject.Save(6, &i.fs) + stateSinkObject.Save(7, &i.metadataMu) + stateSinkObject.Save(8, &i.nodeID) + stateSinkObject.Save(9, &i.locks) + stateSinkObject.Save(10, &i.size) + stateSinkObject.Save(11, &i.attributeVersion) + stateSinkObject.Save(12, &i.attributeTime) + stateSinkObject.Save(13, &i.version) + stateSinkObject.Save(14, &i.link) +} + +func (i *inode) afterLoad() {} + +// +checklocksignore +func (i *inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.inodeRefs) + stateSourceObject.Load(1, &i.InodeAlwaysValid) + stateSourceObject.Load(2, &i.InodeAttrs) + stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(4, &i.InodeNotSymlink) + stateSourceObject.Load(5, &i.OrderedChildren) + stateSourceObject.Load(6, &i.fs) + stateSourceObject.Load(7, &i.metadataMu) + stateSourceObject.Load(8, &i.nodeID) + stateSourceObject.Load(9, &i.locks) + stateSourceObject.Load(10, &i.size) + stateSourceObject.Load(11, &i.attributeVersion) + stateSourceObject.Load(12, &i.attributeTime) + stateSourceObject.Load(13, &i.version) + stateSourceObject.Load(14, &i.link) +} + +func (r *inodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.inodeRefs" +} + +func (r *inodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *inodeRefs) beforeSave() {} + +// +checklocksignore +func (r *inodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *inodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (l *requestList) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.requestList" +} + +func (l *requestList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *requestList) beforeSave() {} + +// +checklocksignore +func (l *requestList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *requestList) afterLoad() {} + +// +checklocksignore +func (l *requestList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *requestEntry) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.requestEntry" +} + +func (e *requestEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *requestEntry) beforeSave() {} + +// +checklocksignore +func (e *requestEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *requestEntry) afterLoad() {} + +// +checklocksignore +func (e *requestEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *Request) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.Request" +} + +func (r *Request) StateFields() []string { + return []string{ + "requestEntry", + "id", + "hdr", + "data", + "payload", + "async", + "noReply", + } +} + +func (r *Request) beforeSave() {} + +// +checklocksignore +func (r *Request) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.requestEntry) + stateSinkObject.Save(1, &r.id) + stateSinkObject.Save(2, &r.hdr) + stateSinkObject.Save(3, &r.data) + stateSinkObject.Save(4, &r.payload) + stateSinkObject.Save(5, &r.async) + stateSinkObject.Save(6, &r.noReply) +} + +func (r *Request) afterLoad() {} + +// +checklocksignore +func (r *Request) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.requestEntry) + stateSourceObject.Load(1, &r.id) + stateSourceObject.Load(2, &r.hdr) + stateSourceObject.Load(3, &r.data) + stateSourceObject.Load(4, &r.payload) + stateSourceObject.Load(5, &r.async) + stateSourceObject.Load(6, &r.noReply) +} + +func (f *futureResponse) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.futureResponse" +} + +func (f *futureResponse) StateFields() []string { + return []string{ + "opcode", + "ch", + "hdr", + "data", + "async", + } +} + +func (f *futureResponse) beforeSave() {} + +// +checklocksignore +func (f *futureResponse) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.opcode) + stateSinkObject.Save(1, &f.ch) + stateSinkObject.Save(2, &f.hdr) + stateSinkObject.Save(3, &f.data) + stateSinkObject.Save(4, &f.async) +} + +func (f *futureResponse) afterLoad() {} + +// +checklocksignore +func (f *futureResponse) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.opcode) + stateSourceObject.Load(1, &f.ch) + stateSourceObject.Load(2, &f.hdr) + stateSourceObject.Load(3, &f.data) + stateSourceObject.Load(4, &f.async) +} + +func (r *Response) StateTypeName() string { + return "pkg/sentry/fsimpl/fuse.Response" +} + +func (r *Response) StateFields() []string { + return []string{ + "opcode", + "hdr", + "data", + } +} + +func (r *Response) beforeSave() {} + +// +checklocksignore +func (r *Response) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.opcode) + stateSinkObject.Save(1, &r.hdr) + stateSinkObject.Save(2, &r.data) +} + +func (r *Response) afterLoad() {} + +// +checklocksignore +func (r *Response) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.opcode) + stateSourceObject.Load(1, &r.hdr) + stateSourceObject.Load(2, &r.data) +} + +func init() { + state.Register((*connection)(nil)) + state.Register((*fuseDevice)(nil)) + state.Register((*DeviceFD)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*filesystemOptions)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inode)(nil)) + state.Register((*inodeRefs)(nil)) + state.Register((*requestList)(nil)) + state.Register((*requestEntry)(nil)) + state.Register((*Request)(nil)) + state.Register((*futureResponse)(nil)) + state.Register((*Response)(nil)) +} diff --git a/pkg/sentry/fsimpl/fuse/inode_refs.go b/pkg/sentry/fsimpl/fuse/inode_refs.go new file mode 100644 index 000000000..74489cf5e --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/inode_refs.go @@ -0,0 +1,140 @@ +package fuse + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const inodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var inodeobj *inode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type inodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *inodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *inodeRefs) RefType() string { + return fmt.Sprintf("%T", inodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *inodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *inodeRefs) LogRefs() bool { + return inodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *inodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *inodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if inodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *inodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if inodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *inodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if inodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *inodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/fuse/request_list.go b/pkg/sentry/fsimpl/fuse/request_list.go new file mode 100644 index 000000000..060ac4a3f --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/request_list.go @@ -0,0 +1,221 @@ +package fuse + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type requestElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (requestElementMapper) linkerFor(elem *Request) *Request { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type requestList struct { + head *Request + tail *Request +} + +// Reset resets list l to the empty state. +func (l *requestList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *requestList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *requestList) Front() *Request { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *requestList) Back() *Request { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *requestList) Len() (count int) { + for e := l.Front(); e != nil; e = (requestElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *requestList) PushFront(e *Request) { + linker := requestElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + requestElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *requestList) PushBack(e *Request) { + linker := requestElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + requestElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *requestList) PushBackList(m *requestList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + requestElementMapper{}.linkerFor(l.tail).SetNext(m.head) + requestElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *requestList) InsertAfter(b, e *Request) { + bLinker := requestElementMapper{}.linkerFor(b) + eLinker := requestElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + requestElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *requestList) InsertBefore(a, e *Request) { + aLinker := requestElementMapper{}.linkerFor(a) + eLinker := requestElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + requestElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *requestList) Remove(e *Request) { + linker := requestElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + requestElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + requestElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type requestEntry struct { + next *Request + prev *Request +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *requestEntry) Next() *Request { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *requestEntry) Prev() *Request { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *requestEntry) SetNext(elem *Request) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *requestEntry) SetPrev(elem *Request) { + e.prev = elem +} diff --git a/pkg/sentry/fsimpl/fuse/utils_test.go b/pkg/sentry/fsimpl/fuse/utils_test.go deleted file mode 100644 index b0bab0066..000000000 --- a/pkg/sentry/fsimpl/fuse/utils_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fuse - -import ( - "io" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - - "gvisor.dev/gvisor/pkg/hostarch" -) - -func setup(t *testing.T) *testutil.System { - k, err := testutil.Boot() - if err != nil { - t.Fatalf("Error creating kernel: %v", err) - } - - ctx := k.SupervisorContext() - creds := auth.CredentialsFromContext(ctx) - - k.VFS().MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserList: true, - AllowUserMount: true, - }) - - mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{}) - if err != nil { - t.Fatalf("NewMountNamespace(): %v", err) - } - - return testutil.NewSystem(ctx, t, k.VFS(), mntns) -} - -// newTestConnection creates a fuse connection that the sentry can communicate with -// and the FD for the server to communicate with. -func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveRequests uint64) (*connection, *vfs.FileDescription, error) { - fuseDev := &DeviceFD{} - - vd := system.VFS.NewAnonVirtualDentry("fuse") - defer vd.DecRef(system.Ctx) - if err := fuseDev.vfsfd.Init(fuseDev, linux.O_RDWR, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{}); err != nil { - return nil, nil, err - } - - fsopts := filesystemOptions{ - maxActiveRequests: maxActiveRequests, - } - fs, err := newFUSEFilesystem(system.Ctx, system.VFS, &FilesystemType{}, fuseDev, 0, &fsopts) - if err != nil { - return nil, nil, err - } - return fs.conn, &fuseDev.vfsfd, nil -} - -type testPayload struct { - marshal.StubMarshallable - data uint32 -} - -// SizeBytes implements marshal.Marshallable.SizeBytes. -func (t *testPayload) SizeBytes() int { - return 4 -} - -// MarshalBytes implements marshal.Marshallable.MarshalBytes. -func (t *testPayload) MarshalBytes(dst []byte) { - hostarch.ByteOrder.PutUint32(dst[:4], t.data) -} - -// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. -func (t *testPayload) UnmarshalBytes(src []byte) { - *t = testPayload{data: hostarch.ByteOrder.Uint32(src[:4])} -} - -// Packed implements marshal.Marshallable.Packed. -func (t *testPayload) Packed() bool { - return true -} - -// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. -func (t *testPayload) MarshalUnsafe(dst []byte) { - t.MarshalBytes(dst) -} - -// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. -func (t *testPayload) UnmarshalUnsafe(src []byte) { - t.UnmarshalBytes(src) -} - -// CopyOutN implements marshal.Marshallable.CopyOutN. -func (t *testPayload) CopyOutN(task marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { - panic("not implemented") -} - -// CopyOut implements marshal.Marshallable.CopyOut. -func (t *testPayload) CopyOut(task marshal.CopyContext, addr hostarch.Addr) (int, error) { - panic("not implemented") -} - -// CopyIn implements marshal.Marshallable.CopyIn. -func (t *testPayload) CopyIn(task marshal.CopyContext, addr hostarch.Addr) (int, error) { - panic("not implemented") -} - -// WriteTo implements io.WriterTo.WriteTo. -func (t *testPayload) WriteTo(w io.Writer) (int64, error) { - panic("not implemented") -} diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD deleted file mode 100644 index 752060044..000000000 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ /dev/null @@ -1,99 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "dentry_list", - out = "dentry_list.go", - package = "gofer", - prefix = "dentry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dentry", - "Linker": "*dentry", - }, -) - -go_template_instance( - name = "fstree", - out = "fstree.go", - package = "gofer", - prefix = "generic", - template = "//pkg/sentry/vfs/genericfstree:generic_fstree", - types = { - "Dentry": "dentry", - }, -) - -go_library( - name = "gofer", - srcs = [ - "dentry_list.go", - "directory.go", - "filesystem.go", - "fstree.go", - "gofer.go", - "handle.go", - "host_named_pipe.go", - "p9file.go", - "regular_file.go", - "revalidate.go", - "save_restore.go", - "socket.go", - "special_file.go", - "symlink.go", - "time.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/log", - "//pkg/metric", - "//pkg/p9", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsimpl/host", - "//pkg/sentry/fsmetric", - "//pkg/sentry/hostfd", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/unet", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "gofer_test", - srcs = ["gofer_test.go"], - library = ":gofer", - deps = [ - "//pkg/p9", - "//pkg/sentry/contexttest", - "//pkg/sentry/pgalloc", - ], -) diff --git a/pkg/sentry/fsimpl/gofer/dentry_list.go b/pkg/sentry/fsimpl/gofer/dentry_list.go new file mode 100644 index 000000000..2e43b8e02 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/dentry_list.go @@ -0,0 +1,221 @@ +package gofer + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type dentryElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (dentryElementMapper) linkerFor(elem *dentry) *dentry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type dentryList struct { + head *dentry + tail *dentry +} + +// Reset resets list l to the empty state. +func (l *dentryList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *dentryList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *dentryList) Front() *dentry { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *dentryList) Back() *dentry { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *dentryList) Len() (count int) { + for e := l.Front(); e != nil; e = (dentryElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *dentryList) PushFront(e *dentry) { + linker := dentryElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + dentryElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *dentryList) PushBack(e *dentry) { + linker := dentryElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + dentryElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *dentryList) PushBackList(m *dentryList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + dentryElementMapper{}.linkerFor(l.tail).SetNext(m.head) + dentryElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *dentryList) InsertAfter(b, e *dentry) { + bLinker := dentryElementMapper{}.linkerFor(b) + eLinker := dentryElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + dentryElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *dentryList) InsertBefore(a, e *dentry) { + aLinker := dentryElementMapper{}.linkerFor(a) + eLinker := dentryElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + dentryElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *dentryList) Remove(e *dentry) { + linker := dentryElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + dentryElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + dentryElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type dentryEntry struct { + next *dentry + prev *dentry +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *dentryEntry) Next() *dentry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *dentryEntry) Prev() *dentry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *dentryEntry) SetNext(elem *dentry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *dentryEntry) SetPrev(elem *dentry) { + e.prev = elem +} diff --git a/pkg/sentry/fsimpl/gofer/fstree.go b/pkg/sentry/fsimpl/gofer/fstree.go new file mode 100644 index 000000000..6e43d4a4b --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/fstree.go @@ -0,0 +1,55 @@ +package gofer + +import ( + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// IsAncestorDentry returns true if d is an ancestor of d2; that is, d is +// either d2's parent or an ancestor of d2's parent. +func genericIsAncestorDentry(d, d2 *dentry) bool { + for d2 != nil { + if d2.parent == d { + return true + } + if d2.parent == d2 { + return false + } + d2 = d2.parent + } + return false +} + +// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. +func genericParentOrSelf(d *dentry) *dentry { + if d.parent != nil { + return d.parent + } + return d +} + +// PrependPath is a generic implementation of FilesystemImpl.PrependPath(). +func genericPrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *dentry, b *fspath.Builder) error { + for { + if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { + return vfs.PrependPathAtVFSRootError{} + } + if mnt != nil && &d.vfsd == mnt.Root() { + return nil + } + if d.parent == nil { + return vfs.PrependPathAtNonMountRootError{} + } + b.PrependComponent(d.name) + d = d.parent + } +} + +// DebugPathname returns a pathname to d relative to its filesystem root. +// DebugPathname does not correspond to any Linux function; it's used to +// generate dentry pathnames for debugging. +func genericDebugPathname(d *dentry) string { + var b fspath.Builder + _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) + return b.String() +} diff --git a/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go b/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go new file mode 100644 index 000000000..11635f249 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go @@ -0,0 +1,608 @@ +// automatically generated by stateify. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *dentryList) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.dentryList" +} + +func (l *dentryList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *dentryList) beforeSave() {} + +// +checklocksignore +func (l *dentryList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *dentryList) afterLoad() {} + +// +checklocksignore +func (l *dentryList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *dentryEntry) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.dentryEntry" +} + +func (e *dentryEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *dentryEntry) beforeSave() {} + +// +checklocksignore +func (e *dentryEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *dentryEntry) afterLoad() {} + +// +checklocksignore +func (e *dentryEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (fd *directoryFD) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.directoryFD" +} + +func (fd *directoryFD) StateFields() []string { + return []string{ + "fileDescription", + "DirectoryFileDescriptionDefaultImpl", + "off", + "dirents", + } +} + +func (fd *directoryFD) beforeSave() {} + +// +checklocksignore +func (fd *directoryFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.off) + stateSinkObject.Save(3, &fd.dirents) +} + +func (fd *directoryFD) afterLoad() {} + +// +checklocksignore +func (fd *directoryFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.off) + stateSourceObject.Load(3, &fd.dirents) +} + +func (fstype *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.FilesystemType" +} + +func (fstype *FilesystemType) StateFields() []string { + return []string{} +} + +func (fstype *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fstype *FilesystemType) StateSave(stateSinkObject state.Sink) { + fstype.beforeSave() +} + +func (fstype *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fstype *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "vfsfs", + "mfp", + "opts", + "iopts", + "clock", + "devMinor", + "root", + "cachedDentries", + "cachedDentriesLen", + "syncableDentries", + "specialFileFDs", + "lastIno", + "savedDentryRW", + "released", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.vfsfs) + stateSinkObject.Save(1, &fs.mfp) + stateSinkObject.Save(2, &fs.opts) + stateSinkObject.Save(3, &fs.iopts) + stateSinkObject.Save(4, &fs.clock) + stateSinkObject.Save(5, &fs.devMinor) + stateSinkObject.Save(6, &fs.root) + stateSinkObject.Save(7, &fs.cachedDentries) + stateSinkObject.Save(8, &fs.cachedDentriesLen) + stateSinkObject.Save(9, &fs.syncableDentries) + stateSinkObject.Save(10, &fs.specialFileFDs) + stateSinkObject.Save(11, &fs.lastIno) + stateSinkObject.Save(12, &fs.savedDentryRW) + stateSinkObject.Save(13, &fs.released) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.vfsfs) + stateSourceObject.Load(1, &fs.mfp) + stateSourceObject.Load(2, &fs.opts) + stateSourceObject.Load(3, &fs.iopts) + stateSourceObject.Load(4, &fs.clock) + stateSourceObject.Load(5, &fs.devMinor) + stateSourceObject.Load(6, &fs.root) + stateSourceObject.Load(7, &fs.cachedDentries) + stateSourceObject.Load(8, &fs.cachedDentriesLen) + stateSourceObject.Load(9, &fs.syncableDentries) + stateSourceObject.Load(10, &fs.specialFileFDs) + stateSourceObject.Load(11, &fs.lastIno) + stateSourceObject.Load(12, &fs.savedDentryRW) + stateSourceObject.Load(13, &fs.released) +} + +func (f *filesystemOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.filesystemOptions" +} + +func (f *filesystemOptions) StateFields() []string { + return []string{ + "fd", + "aname", + "interop", + "dfltuid", + "dfltgid", + "msize", + "version", + "maxCachedDentries", + "forcePageCache", + "limitHostFDTranslation", + "overlayfsStaleRead", + "regularFilesUseSpecialFileFD", + } +} + +func (f *filesystemOptions) beforeSave() {} + +// +checklocksignore +func (f *filesystemOptions) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.fd) + stateSinkObject.Save(1, &f.aname) + stateSinkObject.Save(2, &f.interop) + stateSinkObject.Save(3, &f.dfltuid) + stateSinkObject.Save(4, &f.dfltgid) + stateSinkObject.Save(5, &f.msize) + stateSinkObject.Save(6, &f.version) + stateSinkObject.Save(7, &f.maxCachedDentries) + stateSinkObject.Save(8, &f.forcePageCache) + stateSinkObject.Save(9, &f.limitHostFDTranslation) + stateSinkObject.Save(10, &f.overlayfsStaleRead) + stateSinkObject.Save(11, &f.regularFilesUseSpecialFileFD) +} + +func (f *filesystemOptions) afterLoad() {} + +// +checklocksignore +func (f *filesystemOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.fd) + stateSourceObject.Load(1, &f.aname) + stateSourceObject.Load(2, &f.interop) + stateSourceObject.Load(3, &f.dfltuid) + stateSourceObject.Load(4, &f.dfltgid) + stateSourceObject.Load(5, &f.msize) + stateSourceObject.Load(6, &f.version) + stateSourceObject.Load(7, &f.maxCachedDentries) + stateSourceObject.Load(8, &f.forcePageCache) + stateSourceObject.Load(9, &f.limitHostFDTranslation) + stateSourceObject.Load(10, &f.overlayfsStaleRead) + stateSourceObject.Load(11, &f.regularFilesUseSpecialFileFD) +} + +func (i *InteropMode) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.InteropMode" +} + +func (i *InteropMode) StateFields() []string { + return nil +} + +func (i *InternalFilesystemOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.InternalFilesystemOptions" +} + +func (i *InternalFilesystemOptions) StateFields() []string { + return []string{ + "UniqueID", + "LeakConnection", + "OpenSocketsByConnecting", + } +} + +func (i *InternalFilesystemOptions) beforeSave() {} + +// +checklocksignore +func (i *InternalFilesystemOptions) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.UniqueID) + stateSinkObject.Save(1, &i.LeakConnection) + stateSinkObject.Save(2, &i.OpenSocketsByConnecting) +} + +func (i *InternalFilesystemOptions) afterLoad() {} + +// +checklocksignore +func (i *InternalFilesystemOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.UniqueID) + stateSourceObject.Load(1, &i.LeakConnection) + stateSourceObject.Load(2, &i.OpenSocketsByConnecting) +} + +func (d *dentry) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.dentry" +} + +func (d *dentry) StateFields() []string { + return []string{ + "vfsd", + "refs", + "fs", + "parent", + "name", + "qidPath", + "deleted", + "cached", + "dentryEntry", + "children", + "syntheticChildren", + "dirents", + "ino", + "mode", + "uid", + "gid", + "blockSize", + "atime", + "mtime", + "ctime", + "btime", + "size", + "atimeDirty", + "mtimeDirty", + "nlink", + "mappings", + "cache", + "dirty", + "pf", + "haveTarget", + "target", + "endpoint", + "pipe", + "locks", + "watches", + } +} + +// +checklocksignore +func (d *dentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.refs) + stateSinkObject.Save(2, &d.fs) + stateSinkObject.Save(3, &d.parent) + stateSinkObject.Save(4, &d.name) + stateSinkObject.Save(5, &d.qidPath) + stateSinkObject.Save(6, &d.deleted) + stateSinkObject.Save(7, &d.cached) + stateSinkObject.Save(8, &d.dentryEntry) + stateSinkObject.Save(9, &d.children) + stateSinkObject.Save(10, &d.syntheticChildren) + stateSinkObject.Save(11, &d.dirents) + stateSinkObject.Save(12, &d.ino) + stateSinkObject.Save(13, &d.mode) + stateSinkObject.Save(14, &d.uid) + stateSinkObject.Save(15, &d.gid) + stateSinkObject.Save(16, &d.blockSize) + stateSinkObject.Save(17, &d.atime) + stateSinkObject.Save(18, &d.mtime) + stateSinkObject.Save(19, &d.ctime) + stateSinkObject.Save(20, &d.btime) + stateSinkObject.Save(21, &d.size) + stateSinkObject.Save(22, &d.atimeDirty) + stateSinkObject.Save(23, &d.mtimeDirty) + stateSinkObject.Save(24, &d.nlink) + stateSinkObject.Save(25, &d.mappings) + stateSinkObject.Save(26, &d.cache) + stateSinkObject.Save(27, &d.dirty) + stateSinkObject.Save(28, &d.pf) + stateSinkObject.Save(29, &d.haveTarget) + stateSinkObject.Save(30, &d.target) + stateSinkObject.Save(31, &d.endpoint) + stateSinkObject.Save(32, &d.pipe) + stateSinkObject.Save(33, &d.locks) + stateSinkObject.Save(34, &d.watches) +} + +// +checklocksignore +func (d *dentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.refs) + stateSourceObject.Load(2, &d.fs) + stateSourceObject.Load(3, &d.parent) + stateSourceObject.Load(4, &d.name) + stateSourceObject.Load(5, &d.qidPath) + stateSourceObject.Load(6, &d.deleted) + stateSourceObject.Load(7, &d.cached) + stateSourceObject.Load(8, &d.dentryEntry) + stateSourceObject.Load(9, &d.children) + stateSourceObject.Load(10, &d.syntheticChildren) + stateSourceObject.Load(11, &d.dirents) + stateSourceObject.Load(12, &d.ino) + stateSourceObject.Load(13, &d.mode) + stateSourceObject.Load(14, &d.uid) + stateSourceObject.Load(15, &d.gid) + stateSourceObject.Load(16, &d.blockSize) + stateSourceObject.Load(17, &d.atime) + stateSourceObject.Load(18, &d.mtime) + stateSourceObject.Load(19, &d.ctime) + stateSourceObject.Load(20, &d.btime) + stateSourceObject.Load(21, &d.size) + stateSourceObject.Load(22, &d.atimeDirty) + stateSourceObject.Load(23, &d.mtimeDirty) + stateSourceObject.Load(24, &d.nlink) + stateSourceObject.Load(25, &d.mappings) + stateSourceObject.Load(26, &d.cache) + stateSourceObject.Load(27, &d.dirty) + stateSourceObject.Load(28, &d.pf) + stateSourceObject.Load(29, &d.haveTarget) + stateSourceObject.Load(30, &d.target) + stateSourceObject.Load(31, &d.endpoint) + stateSourceObject.Load(32, &d.pipe) + stateSourceObject.Load(33, &d.locks) + stateSourceObject.Load(34, &d.watches) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (fd *fileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.fileDescription" +} + +func (fd *fileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + } +} + +func (fd *fileDescription) beforeSave() {} + +// +checklocksignore +func (fd *fileDescription) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.LockFD) +} + +func (fd *fileDescription) afterLoad() {} + +// +checklocksignore +func (fd *fileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.LockFD) +} + +func (fd *regularFileFD) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.regularFileFD" +} + +func (fd *regularFileFD) StateFields() []string { + return []string{ + "fileDescription", + "off", + } +} + +func (fd *regularFileFD) beforeSave() {} + +// +checklocksignore +func (fd *regularFileFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.off) +} + +func (fd *regularFileFD) afterLoad() {} + +// +checklocksignore +func (fd *regularFileFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.off) +} + +func (d *dentryPlatformFile) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.dentryPlatformFile" +} + +func (d *dentryPlatformFile) StateFields() []string { + return []string{ + "dentry", + "fdRefs", + "hostFileMapper", + } +} + +func (d *dentryPlatformFile) beforeSave() {} + +// +checklocksignore +func (d *dentryPlatformFile) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.dentry) + stateSinkObject.Save(1, &d.fdRefs) + stateSinkObject.Save(2, &d.hostFileMapper) +} + +// +checklocksignore +func (d *dentryPlatformFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.dentry) + stateSourceObject.Load(1, &d.fdRefs) + stateSourceObject.Load(2, &d.hostFileMapper) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (s *savedDentryRW) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.savedDentryRW" +} + +func (s *savedDentryRW) StateFields() []string { + return []string{ + "read", + "write", + } +} + +func (s *savedDentryRW) beforeSave() {} + +// +checklocksignore +func (s *savedDentryRW) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.read) + stateSinkObject.Save(1, &s.write) +} + +func (s *savedDentryRW) afterLoad() {} + +// +checklocksignore +func (s *savedDentryRW) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.read) + stateSourceObject.Load(1, &s.write) +} + +func (e *endpoint) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.endpoint" +} + +func (e *endpoint) StateFields() []string { + return []string{ + "dentry", + "path", + } +} + +func (e *endpoint) beforeSave() {} + +// +checklocksignore +func (e *endpoint) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.dentry) + stateSinkObject.Save(1, &e.path) +} + +func (e *endpoint) afterLoad() {} + +// +checklocksignore +func (e *endpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.dentry) + stateSourceObject.Load(1, &e.path) +} + +func (fd *specialFileFD) StateTypeName() string { + return "pkg/sentry/fsimpl/gofer.specialFileFD" +} + +func (fd *specialFileFD) StateFields() []string { + return []string{ + "fileDescription", + "isRegularFile", + "seekable", + "queue", + "off", + "haveBuf", + "buf", + } +} + +func (fd *specialFileFD) beforeSave() {} + +// +checklocksignore +func (fd *specialFileFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.isRegularFile) + stateSinkObject.Save(2, &fd.seekable) + stateSinkObject.Save(3, &fd.queue) + stateSinkObject.Save(4, &fd.off) + stateSinkObject.Save(5, &fd.haveBuf) + stateSinkObject.Save(6, &fd.buf) +} + +// +checklocksignore +func (fd *specialFileFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.isRegularFile) + stateSourceObject.Load(2, &fd.seekable) + stateSourceObject.Load(3, &fd.queue) + stateSourceObject.Load(4, &fd.off) + stateSourceObject.Load(5, &fd.haveBuf) + stateSourceObject.Load(6, &fd.buf) + stateSourceObject.AfterLoad(fd.afterLoad) +} + +func init() { + state.Register((*dentryList)(nil)) + state.Register((*dentryEntry)(nil)) + state.Register((*directoryFD)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*filesystemOptions)(nil)) + state.Register((*InteropMode)(nil)) + state.Register((*InternalFilesystemOptions)(nil)) + state.Register((*dentry)(nil)) + state.Register((*fileDescription)(nil)) + state.Register((*regularFileFD)(nil)) + state.Register((*dentryPlatformFile)(nil)) + state.Register((*savedDentryRW)(nil)) + state.Register((*endpoint)(nil)) + state.Register((*specialFileFD)(nil)) +} diff --git a/pkg/sentry/fsimpl/gofer/gofer_test.go b/pkg/sentry/fsimpl/gofer/gofer_test.go deleted file mode 100644 index 806392d50..000000000 --- a/pkg/sentry/fsimpl/gofer/gofer_test.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gofer - -import ( - "sync/atomic" - "testing" - - "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" -) - -func TestDestroyIdempotent(t *testing.T) { - ctx := contexttest.Context(t) - fs := filesystem{ - mfp: pgalloc.MemoryFileProviderFromContext(ctx), - opts: filesystemOptions{ - // Test relies on no dentry being held in the cache. - maxCachedDentries: 0, - }, - syncableDentries: make(map[*dentry]struct{}), - inoByQIDPath: make(map[uint64]uint64), - } - - attr := &p9.Attr{ - Mode: p9.ModeRegular, - } - mask := p9.AttrMask{ - Mode: true, - Size: true, - } - parent, err := fs.newDentry(ctx, p9file{}, p9.QID{}, mask, attr) - if err != nil { - t.Fatalf("fs.newDentry(): %v", err) - } - - child, err := fs.newDentry(ctx, p9file{}, p9.QID{}, mask, attr) - if err != nil { - t.Fatalf("fs.newDentry(): %v", err) - } - parent.cacheNewChildLocked(child, "child") - - fs.renameMu.Lock() - defer fs.renameMu.Unlock() - child.checkCachingLocked(ctx, true /* renameMuWriteLocked */) - if got := atomic.LoadInt64(&child.refs); got != -1 { - t.Fatalf("child.refs=%d, want: -1", got) - } - // Parent will also be destroyed when child reference is removed. - if got := atomic.LoadInt64(&parent.refs); got != -1 { - t.Fatalf("parent.refs=%d, want: -1", got) - } - child.checkCachingLocked(ctx, true /* renameMuWriteLocked */) - child.checkCachingLocked(ctx, true /* renameMuWriteLocked */) -} diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD deleted file mode 100644 index 476545d00..000000000 --- a/pkg/sentry/fsimpl/host/BUILD +++ /dev/null @@ -1,80 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "inode_refs", - out = "inode_refs.go", - package = "host", - prefix = "inode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "inode", - }, -) - -go_template_instance( - name = "connected_endpoint_refs", - out = "connected_endpoint_refs.go", - package = "host", - prefix = "ConnectedEndpoint", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "ConnectedEndpoint", - }, -) - -go_library( - name = "host", - srcs = [ - "connected_endpoint_refs.go", - "control.go", - "host.go", - "inode_refs.go", - "ioctl_unsafe.go", - "save_restore.go", - "socket.go", - "socket_iovec.go", - "socket_unsafe.go", - "tty.go", - "util.go", - "util_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fdnotifier", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal/primitive", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/hostfd", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sentry/uniqueid", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/unet", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go new file mode 100644 index 000000000..c0a87f656 --- /dev/null +++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go @@ -0,0 +1,140 @@ +package host + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const ConnectedEndpointenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var ConnectedEndpointobj *ConnectedEndpoint + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type ConnectedEndpointRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *ConnectedEndpointRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *ConnectedEndpointRefs) RefType() string { + return fmt.Sprintf("%T", ConnectedEndpointobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *ConnectedEndpointRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *ConnectedEndpointRefs) LogRefs() bool { + return ConnectedEndpointenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *ConnectedEndpointRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *ConnectedEndpointRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if ConnectedEndpointenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *ConnectedEndpointRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if ConnectedEndpointenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *ConnectedEndpointRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if ConnectedEndpointenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *ConnectedEndpointRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/host/host_state_autogen.go b/pkg/sentry/fsimpl/host/host_state_autogen.go new file mode 100644 index 000000000..607474165 --- /dev/null +++ b/pkg/sentry/fsimpl/host/host_state_autogen.go @@ -0,0 +1,327 @@ +// automatically generated by stateify. + +package host + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *ConnectedEndpointRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/host.ConnectedEndpointRefs" +} + +func (r *ConnectedEndpointRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *ConnectedEndpointRefs) beforeSave() {} + +// +checklocksignore +func (r *ConnectedEndpointRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *ConnectedEndpointRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (v *virtualOwner) StateTypeName() string { + return "pkg/sentry/fsimpl/host.virtualOwner" +} + +func (v *virtualOwner) StateFields() []string { + return []string{ + "enabled", + "uid", + "gid", + "mode", + } +} + +func (v *virtualOwner) beforeSave() {} + +// +checklocksignore +func (v *virtualOwner) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.enabled) + stateSinkObject.Save(1, &v.uid) + stateSinkObject.Save(2, &v.gid) + stateSinkObject.Save(3, &v.mode) +} + +func (v *virtualOwner) afterLoad() {} + +// +checklocksignore +func (v *virtualOwner) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.enabled) + stateSourceObject.Load(1, &v.uid) + stateSourceObject.Load(2, &v.gid) + stateSourceObject.Load(3, &v.mode) +} + +func (i *inode) StateTypeName() string { + return "pkg/sentry/fsimpl/host.inode" +} + +func (i *inode) StateFields() []string { + return []string{ + "InodeNoStatFS", + "InodeNotDirectory", + "InodeNotSymlink", + "CachedMappable", + "InodeTemporary", + "locks", + "inodeRefs", + "hostFD", + "ino", + "ftype", + "mayBlock", + "seekable", + "isTTY", + "savable", + "queue", + "virtualOwner", + "haveBuf", + "buf", + } +} + +// +checklocksignore +func (i *inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeNoStatFS) + stateSinkObject.Save(1, &i.InodeNotDirectory) + stateSinkObject.Save(2, &i.InodeNotSymlink) + stateSinkObject.Save(3, &i.CachedMappable) + stateSinkObject.Save(4, &i.InodeTemporary) + stateSinkObject.Save(5, &i.locks) + stateSinkObject.Save(6, &i.inodeRefs) + stateSinkObject.Save(7, &i.hostFD) + stateSinkObject.Save(8, &i.ino) + stateSinkObject.Save(9, &i.ftype) + stateSinkObject.Save(10, &i.mayBlock) + stateSinkObject.Save(11, &i.seekable) + stateSinkObject.Save(12, &i.isTTY) + stateSinkObject.Save(13, &i.savable) + stateSinkObject.Save(14, &i.queue) + stateSinkObject.Save(15, &i.virtualOwner) + stateSinkObject.Save(16, &i.haveBuf) + stateSinkObject.Save(17, &i.buf) +} + +// +checklocksignore +func (i *inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeNoStatFS) + stateSourceObject.Load(1, &i.InodeNotDirectory) + stateSourceObject.Load(2, &i.InodeNotSymlink) + stateSourceObject.Load(3, &i.CachedMappable) + stateSourceObject.Load(4, &i.InodeTemporary) + stateSourceObject.Load(5, &i.locks) + stateSourceObject.Load(6, &i.inodeRefs) + stateSourceObject.Load(7, &i.hostFD) + stateSourceObject.Load(8, &i.ino) + stateSourceObject.Load(9, &i.ftype) + stateSourceObject.Load(10, &i.mayBlock) + stateSourceObject.Load(11, &i.seekable) + stateSourceObject.Load(12, &i.isTTY) + stateSourceObject.Load(13, &i.savable) + stateSourceObject.Load(14, &i.queue) + stateSourceObject.Load(15, &i.virtualOwner) + stateSourceObject.Load(16, &i.haveBuf) + stateSourceObject.Load(17, &i.buf) + stateSourceObject.AfterLoad(i.afterLoad) +} + +func (f *filesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/host.filesystemType" +} + +func (f *filesystemType) StateFields() []string { + return []string{} +} + +func (f *filesystemType) beforeSave() {} + +// +checklocksignore +func (f *filesystemType) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystemType) afterLoad() {} + +// +checklocksignore +func (f *filesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/host.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (f *fileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/host.fileDescription" +} + +func (f *fileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + "inode", + "offset", + } +} + +func (f *fileDescription) beforeSave() {} + +// +checklocksignore +func (f *fileDescription) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.vfsfd) + stateSinkObject.Save(1, &f.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &f.LockFD) + stateSinkObject.Save(3, &f.inode) + stateSinkObject.Save(4, &f.offset) +} + +func (f *fileDescription) afterLoad() {} + +// +checklocksignore +func (f *fileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.vfsfd) + stateSourceObject.Load(1, &f.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &f.LockFD) + stateSourceObject.Load(3, &f.inode) + stateSourceObject.Load(4, &f.offset) +} + +func (r *inodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/host.inodeRefs" +} + +func (r *inodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *inodeRefs) beforeSave() {} + +// +checklocksignore +func (r *inodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *inodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (c *ConnectedEndpoint) StateTypeName() string { + return "pkg/sentry/fsimpl/host.ConnectedEndpoint" +} + +func (c *ConnectedEndpoint) StateFields() []string { + return []string{ + "ConnectedEndpointRefs", + "fd", + "addr", + "stype", + } +} + +func (c *ConnectedEndpoint) beforeSave() {} + +// +checklocksignore +func (c *ConnectedEndpoint) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.ConnectedEndpointRefs) + stateSinkObject.Save(1, &c.fd) + stateSinkObject.Save(2, &c.addr) + stateSinkObject.Save(3, &c.stype) +} + +// +checklocksignore +func (c *ConnectedEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.ConnectedEndpointRefs) + stateSourceObject.Load(1, &c.fd) + stateSourceObject.Load(2, &c.addr) + stateSourceObject.Load(3, &c.stype) + stateSourceObject.AfterLoad(c.afterLoad) +} + +func (t *TTYFileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/host.TTYFileDescription" +} + +func (t *TTYFileDescription) StateFields() []string { + return []string{ + "fileDescription", + "session", + "fgProcessGroup", + "termios", + } +} + +func (t *TTYFileDescription) beforeSave() {} + +// +checklocksignore +func (t *TTYFileDescription) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.fileDescription) + stateSinkObject.Save(1, &t.session) + stateSinkObject.Save(2, &t.fgProcessGroup) + stateSinkObject.Save(3, &t.termios) +} + +func (t *TTYFileDescription) afterLoad() {} + +// +checklocksignore +func (t *TTYFileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.fileDescription) + stateSourceObject.Load(1, &t.session) + stateSourceObject.Load(2, &t.fgProcessGroup) + stateSourceObject.Load(3, &t.termios) +} + +func init() { + state.Register((*ConnectedEndpointRefs)(nil)) + state.Register((*virtualOwner)(nil)) + state.Register((*inode)(nil)) + state.Register((*filesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*fileDescription)(nil)) + state.Register((*inodeRefs)(nil)) + state.Register((*ConnectedEndpoint)(nil)) + state.Register((*TTYFileDescription)(nil)) +} diff --git a/pkg/sentry/fsimpl/host/host_unsafe_state_autogen.go b/pkg/sentry/fsimpl/host/host_unsafe_state_autogen.go new file mode 100644 index 000000000..b2d8c661f --- /dev/null +++ b/pkg/sentry/fsimpl/host/host_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package host diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go new file mode 100644 index 000000000..112f39850 --- /dev/null +++ b/pkg/sentry/fsimpl/host/inode_refs.go @@ -0,0 +1,140 @@ +package host + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const inodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var inodeobj *inode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type inodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *inodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *inodeRefs) RefType() string { + return fmt.Sprintf("%T", inodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *inodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *inodeRefs) LogRefs() bool { + return inodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *inodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *inodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if inodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *inodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if inodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *inodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if inodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *inodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD deleted file mode 100644 index d53937db6..000000000 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ /dev/null @@ -1,151 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "dentry_list", - out = "dentry_list.go", - package = "kernfs", - prefix = "dentry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Dentry", - "Linker": "*Dentry", - }, -) - -go_template_instance( - name = "fstree", - out = "fstree.go", - package = "kernfs", - prefix = "generic", - template = "//pkg/sentry/vfs/genericfstree:generic_fstree", - types = { - "Dentry": "Dentry", - }, -) - -go_template_instance( - name = "slot_list", - out = "slot_list.go", - package = "kernfs", - prefix = "slot", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*slot", - "Linker": "*slot", - }, -) - -go_template_instance( - name = "static_directory_refs", - out = "static_directory_refs.go", - package = "kernfs", - prefix = "StaticDirectory", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "StaticDirectory", - }, -) - -go_template_instance( - name = "dir_refs", - out = "dir_refs.go", - package = "kernfs_test", - prefix = "dir", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "dir", - }, -) - -go_template_instance( - name = "readonly_dir_refs", - out = "readonly_dir_refs.go", - package = "kernfs_test", - prefix = "readonlyDir", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "readonlyDir", - }, -) - -go_template_instance( - name = "synthetic_directory_refs", - out = "synthetic_directory_refs.go", - package = "kernfs", - prefix = "syntheticDirectory", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "syntheticDirectory", - }, -) - -go_library( - name = "kernfs", - srcs = [ - "dentry_list.go", - "dynamic_bytes_file.go", - "fd_impl_util.go", - "filesystem.go", - "fstree.go", - "inode_impl_util.go", - "kernfs.go", - "mmap_util.go", - "save_restore.go", - "slot_list.go", - "static_directory_refs.go", - "symlink.go", - "synthetic_directory.go", - "synthetic_directory_refs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "kernfs_test", - size = "small", - srcs = [ - "dir_refs.go", - "kernfs_test.go", - "readonly_dir_refs.go", - ], - deps = [ - ":kernfs", - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/contexttest", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - "@com_github_google_go_cmp//cmp:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/kernfs/dentry_list.go b/pkg/sentry/fsimpl/kernfs/dentry_list.go new file mode 100644 index 000000000..e73cde1f1 --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/dentry_list.go @@ -0,0 +1,221 @@ +package kernfs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type dentryElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (dentryElementMapper) linkerFor(elem *Dentry) *Dentry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type dentryList struct { + head *Dentry + tail *Dentry +} + +// Reset resets list l to the empty state. +func (l *dentryList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *dentryList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *dentryList) Front() *Dentry { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *dentryList) Back() *Dentry { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *dentryList) Len() (count int) { + for e := l.Front(); e != nil; e = (dentryElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *dentryList) PushFront(e *Dentry) { + linker := dentryElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + dentryElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *dentryList) PushBack(e *Dentry) { + linker := dentryElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + dentryElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *dentryList) PushBackList(m *dentryList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + dentryElementMapper{}.linkerFor(l.tail).SetNext(m.head) + dentryElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *dentryList) InsertAfter(b, e *Dentry) { + bLinker := dentryElementMapper{}.linkerFor(b) + eLinker := dentryElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + dentryElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *dentryList) InsertBefore(a, e *Dentry) { + aLinker := dentryElementMapper{}.linkerFor(a) + eLinker := dentryElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + dentryElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *dentryList) Remove(e *Dentry) { + linker := dentryElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + dentryElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + dentryElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type dentryEntry struct { + next *Dentry + prev *Dentry +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *dentryEntry) Next() *Dentry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *dentryEntry) Prev() *Dentry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *dentryEntry) SetNext(elem *Dentry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *dentryEntry) SetPrev(elem *Dentry) { + e.prev = elem +} diff --git a/pkg/sentry/fsimpl/kernfs/fstree.go b/pkg/sentry/fsimpl/kernfs/fstree.go new file mode 100644 index 000000000..9dc52dabc --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/fstree.go @@ -0,0 +1,55 @@ +package kernfs + +import ( + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// IsAncestorDentry returns true if d is an ancestor of d2; that is, d is +// either d2's parent or an ancestor of d2's parent. +func genericIsAncestorDentry(d, d2 *Dentry) bool { + for d2 != nil { + if d2.parent == d { + return true + } + if d2.parent == d2 { + return false + } + d2 = d2.parent + } + return false +} + +// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. +func genericParentOrSelf(d *Dentry) *Dentry { + if d.parent != nil { + return d.parent + } + return d +} + +// PrependPath is a generic implementation of FilesystemImpl.PrependPath(). +func genericPrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *Dentry, b *fspath.Builder) error { + for { + if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { + return vfs.PrependPathAtVFSRootError{} + } + if mnt != nil && &d.vfsd == mnt.Root() { + return nil + } + if d.parent == nil { + return vfs.PrependPathAtNonMountRootError{} + } + b.PrependComponent(d.name) + d = d.parent + } +} + +// DebugPathname returns a pathname to d relative to its filesystem root. +// DebugPathname does not correspond to any Linux function; it's used to +// generate dentry pathnames for debugging. +func genericDebugPathname(d *Dentry) string { + var b fspath.Builder + _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) + return b.String() +} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go new file mode 100644 index 000000000..f8add23f8 --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go @@ -0,0 +1,965 @@ +// automatically generated by stateify. + +package kernfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *dentryList) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.dentryList" +} + +func (l *dentryList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *dentryList) beforeSave() {} + +// +checklocksignore +func (l *dentryList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *dentryList) afterLoad() {} + +// +checklocksignore +func (l *dentryList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *dentryEntry) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.dentryEntry" +} + +func (e *dentryEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *dentryEntry) beforeSave() {} + +// +checklocksignore +func (e *dentryEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *dentryEntry) afterLoad() {} + +// +checklocksignore +func (e *dentryEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (f *DynamicBytesFile) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.DynamicBytesFile" +} + +func (f *DynamicBytesFile) StateFields() []string { + return []string{ + "InodeAttrs", + "InodeNoStatFS", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + "locks", + "data", + } +} + +func (f *DynamicBytesFile) beforeSave() {} + +// +checklocksignore +func (f *DynamicBytesFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeAttrs) + stateSinkObject.Save(1, &f.InodeNoStatFS) + stateSinkObject.Save(2, &f.InodeNoopRefCount) + stateSinkObject.Save(3, &f.InodeNotDirectory) + stateSinkObject.Save(4, &f.InodeNotSymlink) + stateSinkObject.Save(5, &f.locks) + stateSinkObject.Save(6, &f.data) +} + +func (f *DynamicBytesFile) afterLoad() {} + +// +checklocksignore +func (f *DynamicBytesFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeAttrs) + stateSourceObject.Load(1, &f.InodeNoStatFS) + stateSourceObject.Load(2, &f.InodeNoopRefCount) + stateSourceObject.Load(3, &f.InodeNotDirectory) + stateSourceObject.Load(4, &f.InodeNotSymlink) + stateSourceObject.Load(5, &f.locks) + stateSourceObject.Load(6, &f.data) +} + +func (fd *DynamicBytesFD) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.DynamicBytesFD" +} + +func (fd *DynamicBytesFD) StateFields() []string { + return []string{ + "FileDescriptionDefaultImpl", + "DynamicBytesFileDescriptionImpl", + "LockFD", + "vfsfd", + "inode", + } +} + +func (fd *DynamicBytesFD) beforeSave() {} + +// +checklocksignore +func (fd *DynamicBytesFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(1, &fd.DynamicBytesFileDescriptionImpl) + stateSinkObject.Save(2, &fd.LockFD) + stateSinkObject.Save(3, &fd.vfsfd) + stateSinkObject.Save(4, &fd.inode) +} + +func (fd *DynamicBytesFD) afterLoad() {} + +// +checklocksignore +func (fd *DynamicBytesFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(1, &fd.DynamicBytesFileDescriptionImpl) + stateSourceObject.Load(2, &fd.LockFD) + stateSourceObject.Load(3, &fd.vfsfd) + stateSourceObject.Load(4, &fd.inode) +} + +func (s *SeekEndConfig) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.SeekEndConfig" +} + +func (s *SeekEndConfig) StateFields() []string { + return nil +} + +func (g *GenericDirectoryFDOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.GenericDirectoryFDOptions" +} + +func (g *GenericDirectoryFDOptions) StateFields() []string { + return []string{ + "SeekEnd", + } +} + +func (g *GenericDirectoryFDOptions) beforeSave() {} + +// +checklocksignore +func (g *GenericDirectoryFDOptions) StateSave(stateSinkObject state.Sink) { + g.beforeSave() + stateSinkObject.Save(0, &g.SeekEnd) +} + +func (g *GenericDirectoryFDOptions) afterLoad() {} + +// +checklocksignore +func (g *GenericDirectoryFDOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &g.SeekEnd) +} + +func (fd *GenericDirectoryFD) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.GenericDirectoryFD" +} + +func (fd *GenericDirectoryFD) StateFields() []string { + return []string{ + "FileDescriptionDefaultImpl", + "DirectoryFileDescriptionDefaultImpl", + "LockFD", + "seekEnd", + "vfsfd", + "children", + "off", + } +} + +func (fd *GenericDirectoryFD) beforeSave() {} + +// +checklocksignore +func (fd *GenericDirectoryFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.LockFD) + stateSinkObject.Save(3, &fd.seekEnd) + stateSinkObject.Save(4, &fd.vfsfd) + stateSinkObject.Save(5, &fd.children) + stateSinkObject.Save(6, &fd.off) +} + +func (fd *GenericDirectoryFD) afterLoad() {} + +// +checklocksignore +func (fd *GenericDirectoryFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.LockFD) + stateSourceObject.Load(3, &fd.seekEnd) + stateSourceObject.Load(4, &fd.vfsfd) + stateSourceObject.Load(5, &fd.children) + stateSourceObject.Load(6, &fd.off) +} + +func (i *InodeNoopRefCount) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeNoopRefCount" +} + +func (i *InodeNoopRefCount) StateFields() []string { + return []string{ + "InodeTemporary", + } +} + +func (i *InodeNoopRefCount) beforeSave() {} + +// +checklocksignore +func (i *InodeNoopRefCount) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeTemporary) +} + +func (i *InodeNoopRefCount) afterLoad() {} + +// +checklocksignore +func (i *InodeNoopRefCount) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeTemporary) +} + +func (i *InodeDirectoryNoNewChildren) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeDirectoryNoNewChildren" +} + +func (i *InodeDirectoryNoNewChildren) StateFields() []string { + return []string{} +} + +func (i *InodeDirectoryNoNewChildren) beforeSave() {} + +// +checklocksignore +func (i *InodeDirectoryNoNewChildren) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeDirectoryNoNewChildren) afterLoad() {} + +// +checklocksignore +func (i *InodeDirectoryNoNewChildren) StateLoad(stateSourceObject state.Source) { +} + +func (i *InodeNotDirectory) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeNotDirectory" +} + +func (i *InodeNotDirectory) StateFields() []string { + return []string{ + "InodeAlwaysValid", + } +} + +func (i *InodeNotDirectory) beforeSave() {} + +// +checklocksignore +func (i *InodeNotDirectory) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeAlwaysValid) +} + +func (i *InodeNotDirectory) afterLoad() {} + +// +checklocksignore +func (i *InodeNotDirectory) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeAlwaysValid) +} + +func (i *InodeNotSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeNotSymlink" +} + +func (i *InodeNotSymlink) StateFields() []string { + return []string{} +} + +func (i *InodeNotSymlink) beforeSave() {} + +// +checklocksignore +func (i *InodeNotSymlink) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeNotSymlink) afterLoad() {} + +// +checklocksignore +func (i *InodeNotSymlink) StateLoad(stateSourceObject state.Source) { +} + +func (a *InodeAttrs) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeAttrs" +} + +func (a *InodeAttrs) StateFields() []string { + return []string{ + "devMajor", + "devMinor", + "ino", + "mode", + "uid", + "gid", + "nlink", + "blockSize", + "atime", + "mtime", + "ctime", + } +} + +func (a *InodeAttrs) beforeSave() {} + +// +checklocksignore +func (a *InodeAttrs) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.devMajor) + stateSinkObject.Save(1, &a.devMinor) + stateSinkObject.Save(2, &a.ino) + stateSinkObject.Save(3, &a.mode) + stateSinkObject.Save(4, &a.uid) + stateSinkObject.Save(5, &a.gid) + stateSinkObject.Save(6, &a.nlink) + stateSinkObject.Save(7, &a.blockSize) + stateSinkObject.Save(8, &a.atime) + stateSinkObject.Save(9, &a.mtime) + stateSinkObject.Save(10, &a.ctime) +} + +func (a *InodeAttrs) afterLoad() {} + +// +checklocksignore +func (a *InodeAttrs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.devMajor) + stateSourceObject.Load(1, &a.devMinor) + stateSourceObject.Load(2, &a.ino) + stateSourceObject.Load(3, &a.mode) + stateSourceObject.Load(4, &a.uid) + stateSourceObject.Load(5, &a.gid) + stateSourceObject.Load(6, &a.nlink) + stateSourceObject.Load(7, &a.blockSize) + stateSourceObject.Load(8, &a.atime) + stateSourceObject.Load(9, &a.mtime) + stateSourceObject.Load(10, &a.ctime) +} + +func (s *slot) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.slot" +} + +func (s *slot) StateFields() []string { + return []string{ + "name", + "inode", + "static", + "slotEntry", + } +} + +func (s *slot) beforeSave() {} + +// +checklocksignore +func (s *slot) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.name) + stateSinkObject.Save(1, &s.inode) + stateSinkObject.Save(2, &s.static) + stateSinkObject.Save(3, &s.slotEntry) +} + +func (s *slot) afterLoad() {} + +// +checklocksignore +func (s *slot) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.name) + stateSourceObject.Load(1, &s.inode) + stateSourceObject.Load(2, &s.static) + stateSourceObject.Load(3, &s.slotEntry) +} + +func (o *OrderedChildrenOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.OrderedChildrenOptions" +} + +func (o *OrderedChildrenOptions) StateFields() []string { + return []string{ + "Writable", + } +} + +func (o *OrderedChildrenOptions) beforeSave() {} + +// +checklocksignore +func (o *OrderedChildrenOptions) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.Writable) +} + +func (o *OrderedChildrenOptions) afterLoad() {} + +// +checklocksignore +func (o *OrderedChildrenOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.Writable) +} + +func (o *OrderedChildren) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.OrderedChildren" +} + +func (o *OrderedChildren) StateFields() []string { + return []string{ + "writable", + "order", + "set", + } +} + +func (o *OrderedChildren) beforeSave() {} + +// +checklocksignore +func (o *OrderedChildren) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.writable) + stateSinkObject.Save(1, &o.order) + stateSinkObject.Save(2, &o.set) +} + +func (o *OrderedChildren) afterLoad() {} + +// +checklocksignore +func (o *OrderedChildren) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.writable) + stateSourceObject.Load(1, &o.order) + stateSourceObject.Load(2, &o.set) +} + +func (i *InodeSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeSymlink" +} + +func (i *InodeSymlink) StateFields() []string { + return []string{ + "InodeNotDirectory", + } +} + +func (i *InodeSymlink) beforeSave() {} + +// +checklocksignore +func (i *InodeSymlink) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeNotDirectory) +} + +func (i *InodeSymlink) afterLoad() {} + +// +checklocksignore +func (i *InodeSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeNotDirectory) +} + +func (s *StaticDirectory) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.StaticDirectory" +} + +func (s *StaticDirectory) StateFields() []string { + return []string{ + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNoStatFS", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + "StaticDirectoryRefs", + "locks", + "fdOpts", + } +} + +func (s *StaticDirectory) beforeSave() {} + +// +checklocksignore +func (s *StaticDirectory) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeAlwaysValid) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeDirectoryNoNewChildren) + stateSinkObject.Save(3, &s.InodeNoStatFS) + stateSinkObject.Save(4, &s.InodeNotSymlink) + stateSinkObject.Save(5, &s.InodeTemporary) + stateSinkObject.Save(6, &s.OrderedChildren) + stateSinkObject.Save(7, &s.StaticDirectoryRefs) + stateSinkObject.Save(8, &s.locks) + stateSinkObject.Save(9, &s.fdOpts) +} + +func (s *StaticDirectory) afterLoad() {} + +// +checklocksignore +func (s *StaticDirectory) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeAlwaysValid) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeDirectoryNoNewChildren) + stateSourceObject.Load(3, &s.InodeNoStatFS) + stateSourceObject.Load(4, &s.InodeNotSymlink) + stateSourceObject.Load(5, &s.InodeTemporary) + stateSourceObject.Load(6, &s.OrderedChildren) + stateSourceObject.Load(7, &s.StaticDirectoryRefs) + stateSourceObject.Load(8, &s.locks) + stateSourceObject.Load(9, &s.fdOpts) +} + +func (i *InodeAlwaysValid) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeAlwaysValid" +} + +func (i *InodeAlwaysValid) StateFields() []string { + return []string{} +} + +func (i *InodeAlwaysValid) beforeSave() {} + +// +checklocksignore +func (i *InodeAlwaysValid) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeAlwaysValid) afterLoad() {} + +// +checklocksignore +func (i *InodeAlwaysValid) StateLoad(stateSourceObject state.Source) { +} + +func (i *InodeTemporary) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeTemporary" +} + +func (i *InodeTemporary) StateFields() []string { + return []string{} +} + +func (i *InodeTemporary) beforeSave() {} + +// +checklocksignore +func (i *InodeTemporary) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeTemporary) afterLoad() {} + +// +checklocksignore +func (i *InodeTemporary) StateLoad(stateSourceObject state.Source) { +} + +func (i *InodeNoStatFS) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeNoStatFS" +} + +func (i *InodeNoStatFS) StateFields() []string { + return []string{} +} + +func (i *InodeNoStatFS) beforeSave() {} + +// +checklocksignore +func (i *InodeNoStatFS) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeNoStatFS) afterLoad() {} + +// +checklocksignore +func (i *InodeNoStatFS) StateLoad(stateSourceObject state.Source) { +} + +func (fs *Filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.Filesystem" +} + +func (fs *Filesystem) StateFields() []string { + return []string{ + "vfsfs", + "deferredDecRefs", + "nextInoMinusOne", + "cachedDentries", + "cachedDentriesLen", + "MaxCachedDentries", + "root", + } +} + +func (fs *Filesystem) beforeSave() {} + +// +checklocksignore +func (fs *Filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.vfsfs) + stateSinkObject.Save(1, &fs.deferredDecRefs) + stateSinkObject.Save(2, &fs.nextInoMinusOne) + stateSinkObject.Save(3, &fs.cachedDentries) + stateSinkObject.Save(4, &fs.cachedDentriesLen) + stateSinkObject.Save(5, &fs.MaxCachedDentries) + stateSinkObject.Save(6, &fs.root) +} + +func (fs *Filesystem) afterLoad() {} + +// +checklocksignore +func (fs *Filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.vfsfs) + stateSourceObject.Load(1, &fs.deferredDecRefs) + stateSourceObject.Load(2, &fs.nextInoMinusOne) + stateSourceObject.Load(3, &fs.cachedDentries) + stateSourceObject.Load(4, &fs.cachedDentriesLen) + stateSourceObject.Load(5, &fs.MaxCachedDentries) + stateSourceObject.Load(6, &fs.root) +} + +func (d *Dentry) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.Dentry" +} + +func (d *Dentry) StateFields() []string { + return []string{ + "vfsd", + "refs", + "fs", + "flags", + "parent", + "name", + "cached", + "dentryEntry", + "children", + "inode", + } +} + +func (d *Dentry) beforeSave() {} + +// +checklocksignore +func (d *Dentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.refs) + stateSinkObject.Save(2, &d.fs) + stateSinkObject.Save(3, &d.flags) + stateSinkObject.Save(4, &d.parent) + stateSinkObject.Save(5, &d.name) + stateSinkObject.Save(6, &d.cached) + stateSinkObject.Save(7, &d.dentryEntry) + stateSinkObject.Save(8, &d.children) + stateSinkObject.Save(9, &d.inode) +} + +// +checklocksignore +func (d *Dentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.refs) + stateSourceObject.Load(2, &d.fs) + stateSourceObject.Load(3, &d.flags) + stateSourceObject.Load(4, &d.parent) + stateSourceObject.Load(5, &d.name) + stateSourceObject.Load(6, &d.cached) + stateSourceObject.Load(7, &d.dentryEntry) + stateSourceObject.Load(8, &d.children) + stateSourceObject.Load(9, &d.inode) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (i *inodePlatformFile) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.inodePlatformFile" +} + +func (i *inodePlatformFile) StateFields() []string { + return []string{ + "hostFD", + "fdRefs", + "fileMapper", + } +} + +func (i *inodePlatformFile) beforeSave() {} + +// +checklocksignore +func (i *inodePlatformFile) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.hostFD) + stateSinkObject.Save(1, &i.fdRefs) + stateSinkObject.Save(2, &i.fileMapper) +} + +// +checklocksignore +func (i *inodePlatformFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.hostFD) + stateSourceObject.Load(1, &i.fdRefs) + stateSourceObject.Load(2, &i.fileMapper) + stateSourceObject.AfterLoad(i.afterLoad) +} + +func (i *CachedMappable) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.CachedMappable" +} + +func (i *CachedMappable) StateFields() []string { + return []string{ + "mappings", + "pf", + } +} + +func (i *CachedMappable) beforeSave() {} + +// +checklocksignore +func (i *CachedMappable) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.mappings) + stateSinkObject.Save(1, &i.pf) +} + +func (i *CachedMappable) afterLoad() {} + +// +checklocksignore +func (i *CachedMappable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.mappings) + stateSourceObject.Load(1, &i.pf) +} + +func (l *slotList) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.slotList" +} + +func (l *slotList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *slotList) beforeSave() {} + +// +checklocksignore +func (l *slotList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *slotList) afterLoad() {} + +// +checklocksignore +func (l *slotList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *slotEntry) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.slotEntry" +} + +func (e *slotEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *slotEntry) beforeSave() {} + +// +checklocksignore +func (e *slotEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *slotEntry) afterLoad() {} + +// +checklocksignore +func (e *slotEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *StaticDirectoryRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.StaticDirectoryRefs" +} + +func (r *StaticDirectoryRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *StaticDirectoryRefs) beforeSave() {} + +// +checklocksignore +func (r *StaticDirectoryRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *StaticDirectoryRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (s *StaticSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.StaticSymlink" +} + +func (s *StaticSymlink) StateFields() []string { + return []string{ + "InodeAttrs", + "InodeNoopRefCount", + "InodeSymlink", + "InodeNoStatFS", + "target", + } +} + +func (s *StaticSymlink) beforeSave() {} + +// +checklocksignore +func (s *StaticSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeAttrs) + stateSinkObject.Save(1, &s.InodeNoopRefCount) + stateSinkObject.Save(2, &s.InodeSymlink) + stateSinkObject.Save(3, &s.InodeNoStatFS) + stateSinkObject.Save(4, &s.target) +} + +func (s *StaticSymlink) afterLoad() {} + +// +checklocksignore +func (s *StaticSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeAttrs) + stateSourceObject.Load(1, &s.InodeNoopRefCount) + stateSourceObject.Load(2, &s.InodeSymlink) + stateSourceObject.Load(3, &s.InodeNoStatFS) + stateSourceObject.Load(4, &s.target) +} + +func (dir *syntheticDirectory) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.syntheticDirectory" +} + +func (dir *syntheticDirectory) StateFields() []string { + return []string{ + "InodeAlwaysValid", + "InodeAttrs", + "InodeNoStatFS", + "InodeNotSymlink", + "OrderedChildren", + "syntheticDirectoryRefs", + "locks", + } +} + +func (dir *syntheticDirectory) beforeSave() {} + +// +checklocksignore +func (dir *syntheticDirectory) StateSave(stateSinkObject state.Sink) { + dir.beforeSave() + stateSinkObject.Save(0, &dir.InodeAlwaysValid) + stateSinkObject.Save(1, &dir.InodeAttrs) + stateSinkObject.Save(2, &dir.InodeNoStatFS) + stateSinkObject.Save(3, &dir.InodeNotSymlink) + stateSinkObject.Save(4, &dir.OrderedChildren) + stateSinkObject.Save(5, &dir.syntheticDirectoryRefs) + stateSinkObject.Save(6, &dir.locks) +} + +func (dir *syntheticDirectory) afterLoad() {} + +// +checklocksignore +func (dir *syntheticDirectory) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &dir.InodeAlwaysValid) + stateSourceObject.Load(1, &dir.InodeAttrs) + stateSourceObject.Load(2, &dir.InodeNoStatFS) + stateSourceObject.Load(3, &dir.InodeNotSymlink) + stateSourceObject.Load(4, &dir.OrderedChildren) + stateSourceObject.Load(5, &dir.syntheticDirectoryRefs) + stateSourceObject.Load(6, &dir.locks) +} + +func (r *syntheticDirectoryRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.syntheticDirectoryRefs" +} + +func (r *syntheticDirectoryRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *syntheticDirectoryRefs) beforeSave() {} + +// +checklocksignore +func (r *syntheticDirectoryRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *syntheticDirectoryRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func init() { + state.Register((*dentryList)(nil)) + state.Register((*dentryEntry)(nil)) + state.Register((*DynamicBytesFile)(nil)) + state.Register((*DynamicBytesFD)(nil)) + state.Register((*SeekEndConfig)(nil)) + state.Register((*GenericDirectoryFDOptions)(nil)) + state.Register((*GenericDirectoryFD)(nil)) + state.Register((*InodeNoopRefCount)(nil)) + state.Register((*InodeDirectoryNoNewChildren)(nil)) + state.Register((*InodeNotDirectory)(nil)) + state.Register((*InodeNotSymlink)(nil)) + state.Register((*InodeAttrs)(nil)) + state.Register((*slot)(nil)) + state.Register((*OrderedChildrenOptions)(nil)) + state.Register((*OrderedChildren)(nil)) + state.Register((*InodeSymlink)(nil)) + state.Register((*StaticDirectory)(nil)) + state.Register((*InodeAlwaysValid)(nil)) + state.Register((*InodeTemporary)(nil)) + state.Register((*InodeNoStatFS)(nil)) + state.Register((*Filesystem)(nil)) + state.Register((*Dentry)(nil)) + state.Register((*inodePlatformFile)(nil)) + state.Register((*CachedMappable)(nil)) + state.Register((*slotList)(nil)) + state.Register((*slotEntry)(nil)) + state.Register((*StaticDirectoryRefs)(nil)) + state.Register((*StaticSymlink)(nil)) + state.Register((*syntheticDirectory)(nil)) + state.Register((*syntheticDirectoryRefs)(nil)) +} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go deleted file mode 100644 index 609887943..000000000 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ /dev/null @@ -1,348 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernfs_test - -import ( - "bytes" - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -const defaultMode linux.FileMode = 01777 -const staticFileContent = "This is sample content for a static test file." - -// RootDentryFn is a generator function for creating the root dentry of a test -// filesystem. See newTestSystem. -type RootDentryFn func(context.Context, *auth.Credentials, *filesystem) kernfs.Inode - -// newTestSystem sets up a minimal environment for running a test, including an -// instance of a test filesystem. Tests can control the contents of the -// filesystem by providing an appropriate rootFn, which should return a -// pre-populated root dentry. -func newTestSystem(t *testing.T, rootFn RootDentryFn) *testutil.System { - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - v := &vfs.VirtualFilesystem{} - if err := v.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - v.MustRegisterFilesystemType("testfs", &fsType{rootFn: rootFn}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mns, err := v.NewMountNamespace(ctx, creds, "", "testfs", &vfs.MountOptions{}) - if err != nil { - t.Fatalf("Failed to create testfs root mount: %v", err) - } - return testutil.NewSystem(ctx, t, v, mns) -} - -type fsType struct { - rootFn RootDentryFn -} - -type filesystem struct { - kernfs.Filesystem -} - -// MountOptions implements vfs.FilesystemImpl.MountOptions. -func (fs *filesystem) MountOptions() string { - return "" -} - -type file struct { - kernfs.DynamicBytesFile - content string -} - -func (fs *filesystem) newFile(ctx context.Context, creds *auth.Credentials, content string) kernfs.Inode { - f := &file{} - f.content = content - f.DynamicBytesFile.Init(ctx, creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), f, 0777) - return f -} - -func (f *file) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%s", f.content) - return nil -} - -type attrs struct { - kernfs.InodeAttrs -} - -func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return linuxerr.EPERM -} - -type readonlyDir struct { - readonlyDirRefs - attrs - kernfs.InodeAlwaysValid - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeNoStatFS - kernfs.InodeNotSymlink - kernfs.InodeTemporary - kernfs.OrderedChildren - - locks vfs.FileLocks -} - -func (fs *filesystem) newReadonlyDir(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, contents map[string]kernfs.Inode) kernfs.Inode { - dir := &readonlyDir{} - dir.attrs.Init(ctx, creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode) - dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - dir.InitRefs() - dir.IncLinks(dir.OrderedChildren.Populate(contents)) - return dir -} - -func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), kd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{ - SeekEnd: kernfs.SeekEndStaticEntries, - }) - if err != nil { - return nil, err - } - return fd.VFSFileDescription(), nil -} - -func (d *readonlyDir) DecRef(ctx context.Context) { - d.readonlyDirRefs.DecRef(func() { d.Destroy(ctx) }) -} - -type dir struct { - dirRefs - attrs - kernfs.InodeAlwaysValid - kernfs.InodeNotSymlink - kernfs.InodeNoStatFS - kernfs.InodeTemporary - kernfs.OrderedChildren - - locks vfs.FileLocks - - fs *filesystem -} - -func (fs *filesystem) newDir(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, contents map[string]kernfs.Inode) kernfs.Inode { - dir := &dir{} - dir.fs = fs - dir.attrs.Init(ctx, creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode) - dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) - dir.InitRefs() - - dir.IncLinks(dir.OrderedChildren.Populate(contents)) - return dir -} - -func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), kd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{ - SeekEnd: kernfs.SeekEndStaticEntries, - }) - if err != nil { - return nil, err - } - return fd.VFSFileDescription(), nil -} - -func (d *dir) DecRef(ctx context.Context) { - d.dirRefs.DecRef(func() { d.Destroy(ctx) }) -} - -func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) { - creds := auth.CredentialsFromContext(ctx) - dir := d.fs.newDir(ctx, creds, opts.Mode, nil) - if err := d.OrderedChildren.Insert(name, dir); err != nil { - dir.DecRef(ctx) - return nil, err - } - d.TouchCMtime(ctx) - d.IncLinks(1) - return dir, nil -} - -func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) { - creds := auth.CredentialsFromContext(ctx) - f := d.fs.newFile(ctx, creds, "") - if err := d.OrderedChildren.Insert(name, f); err != nil { - f.DecRef(ctx) - return nil, err - } - d.TouchCMtime(ctx) - return f, nil -} - -func (*dir) NewLink(context.Context, string, kernfs.Inode) (kernfs.Inode, error) { - return nil, linuxerr.EPERM -} - -func (*dir) NewSymlink(context.Context, string, string) (kernfs.Inode, error) { - return nil, linuxerr.EPERM -} - -func (*dir) NewNode(context.Context, string, vfs.MknodOptions) (kernfs.Inode, error) { - return nil, linuxerr.EPERM -} - -func (fsType) Name() string { - return "kernfs" -} - -func (fsType) Release(ctx context.Context) {} - -func (fst fsType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opt vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - fs := &filesystem{} - fs.VFSFilesystem().Init(vfsObj, &fst, fs) - root := fst.rootFn(ctx, creds, fs) - var d kernfs.Dentry - d.Init(&fs.Filesystem, root) - return fs.VFSFilesystem(), d.VFSDentry(), nil -} - -// -------------------- Remainder of the file are test cases -------------------- - -func TestBasic(t *testing.T) { - sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { - return fs.newReadonlyDir(ctx, creds, 0755, map[string]kernfs.Inode{ - "file1": fs.newFile(ctx, creds, staticFileContent), - }) - }) - defer sys.Destroy() - sys.GetDentryOrDie(sys.PathOpAtRoot("file1")).DecRef(sys.Ctx) -} - -func TestMkdirGetDentry(t *testing.T) { - sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { - return fs.newReadonlyDir(ctx, creds, 0755, map[string]kernfs.Inode{ - "dir1": fs.newDir(ctx, creds, 0755, nil), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("dir1/a new directory") - if err := sys.VFS.MkdirAt(sys.Ctx, sys.Creds, pop, &vfs.MkdirOptions{Mode: 0755}); err != nil { - t.Fatalf("MkdirAt for PathOperation %+v failed: %v", pop, err) - } - sys.GetDentryOrDie(pop).DecRef(sys.Ctx) -} - -func TestReadStaticFile(t *testing.T) { - sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { - return fs.newReadonlyDir(ctx, creds, 0755, map[string]kernfs.Inode{ - "file1": fs.newFile(ctx, creds, staticFileContent), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("file1") - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err != nil { - t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) - } - defer fd.DecRef(sys.Ctx) - - content, err := sys.ReadToEnd(fd) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - if diff := cmp.Diff(staticFileContent, content); diff != "" { - t.Fatalf("Read returned unexpected data:\n--- want\n+++ got\n%v", diff) - } -} - -func TestCreateNewFileInStaticDir(t *testing.T) { - sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { - return fs.newReadonlyDir(ctx, creds, 0755, map[string]kernfs.Inode{ - "dir1": fs.newDir(ctx, creds, 0755, nil), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("dir1/newfile") - opts := &vfs.OpenOptions{Flags: linux.O_CREAT | linux.O_EXCL, Mode: defaultMode} - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, opts) - if err != nil { - t.Fatalf("OpenAt(pop:%+v, opts:%+v) failed: %v", pop, opts, err) - } - - // Close the file. The file should persist. - fd.DecRef(sys.Ctx) - - fd, err = sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err != nil { - t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err) - } - fd.DecRef(sys.Ctx) -} - -func TestDirFDReadWrite(t *testing.T) { - sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { - return fs.newReadonlyDir(ctx, creds, 0755, nil) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("/") - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err != nil { - t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) - } - defer fd.DecRef(sys.Ctx) - - // Read/Write should fail for directory FDs. - if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); !linuxerr.Equals(linuxerr.EISDIR, err) { - t.Fatalf("Read for directory FD failed with unexpected error: %v", err) - } - if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); !linuxerr.Equals(linuxerr.EBADF, err) { - t.Fatalf("Write for directory FD failed with unexpected error: %v", err) - } -} - -func TestDirFDIterDirents(t *testing.T) { - sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { - return fs.newReadonlyDir(ctx, creds, 0755, map[string]kernfs.Inode{ - // Fill root with nodes backed by various inode implementations. - "dir1": fs.newReadonlyDir(ctx, creds, 0755, nil), - "dir2": fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{ - "dir3": fs.newDir(ctx, creds, 0755, nil), - }), - "file1": fs.newFile(ctx, creds, staticFileContent), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("/") - sys.AssertAllDirentTypes(sys.ListDirents(pop), map[string]testutil.DirentType{ - "dir1": linux.DT_DIR, - "dir2": linux.DT_DIR, - "file1": linux.DT_REG, - }) -} diff --git a/pkg/sentry/fsimpl/kernfs/slot_list.go b/pkg/sentry/fsimpl/kernfs/slot_list.go new file mode 100644 index 000000000..181fe7c8f --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/slot_list.go @@ -0,0 +1,221 @@ +package kernfs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type slotElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (slotElementMapper) linkerFor(elem *slot) *slot { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type slotList struct { + head *slot + tail *slot +} + +// Reset resets list l to the empty state. +func (l *slotList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *slotList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *slotList) Front() *slot { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *slotList) Back() *slot { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *slotList) Len() (count int) { + for e := l.Front(); e != nil; e = (slotElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *slotList) PushFront(e *slot) { + linker := slotElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + slotElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *slotList) PushBack(e *slot) { + linker := slotElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + slotElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *slotList) PushBackList(m *slotList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + slotElementMapper{}.linkerFor(l.tail).SetNext(m.head) + slotElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *slotList) InsertAfter(b, e *slot) { + bLinker := slotElementMapper{}.linkerFor(b) + eLinker := slotElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + slotElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *slotList) InsertBefore(a, e *slot) { + aLinker := slotElementMapper{}.linkerFor(a) + eLinker := slotElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + slotElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *slotList) Remove(e *slot) { + linker := slotElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + slotElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + slotElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type slotEntry struct { + next *slot + prev *slot +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *slotEntry) Next() *slot { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *slotEntry) Prev() *slot { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *slotEntry) SetNext(elem *slot) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *slotEntry) SetPrev(elem *slot) { + e.prev = elem +} diff --git a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go new file mode 100644 index 000000000..69534a2d2 --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go @@ -0,0 +1,140 @@ +package kernfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const StaticDirectoryenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var StaticDirectoryobj *StaticDirectory + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type StaticDirectoryRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *StaticDirectoryRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *StaticDirectoryRefs) RefType() string { + return fmt.Sprintf("%T", StaticDirectoryobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *StaticDirectoryRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *StaticDirectoryRefs) LogRefs() bool { + return StaticDirectoryenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *StaticDirectoryRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *StaticDirectoryRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if StaticDirectoryenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *StaticDirectoryRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if StaticDirectoryenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *StaticDirectoryRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if StaticDirectoryenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *StaticDirectoryRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go new file mode 100644 index 000000000..3c5fdf15e --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go @@ -0,0 +1,140 @@ +package kernfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const syntheticDirectoryenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var syntheticDirectoryobj *syntheticDirectory + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type syntheticDirectoryRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *syntheticDirectoryRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *syntheticDirectoryRefs) RefType() string { + return fmt.Sprintf("%T", syntheticDirectoryobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *syntheticDirectoryRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *syntheticDirectoryRefs) LogRefs() bool { + return syntheticDirectoryenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *syntheticDirectoryRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *syntheticDirectoryRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if syntheticDirectoryenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *syntheticDirectoryRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if syntheticDirectoryenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *syntheticDirectoryRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if syntheticDirectoryenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *syntheticDirectoryRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD deleted file mode 100644 index ed730e215..000000000 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ /dev/null @@ -1,49 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "fstree", - out = "fstree.go", - package = "overlay", - prefix = "generic", - template = "//pkg/sentry/vfs/genericfstree:generic_fstree", - types = { - "Dentry": "dentry", - }, -) - -go_library( - name = "overlay", - srcs = [ - "copy_up.go", - "directory.go", - "filesystem.go", - "fstree.go", - "overlay.go", - "regular_file.go", - "save_restore.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/arch", - "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fsimpl/overlay/fstree.go b/pkg/sentry/fsimpl/overlay/fstree.go new file mode 100644 index 000000000..c3eb062ed --- /dev/null +++ b/pkg/sentry/fsimpl/overlay/fstree.go @@ -0,0 +1,55 @@ +package overlay + +import ( + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// IsAncestorDentry returns true if d is an ancestor of d2; that is, d is +// either d2's parent or an ancestor of d2's parent. +func genericIsAncestorDentry(d, d2 *dentry) bool { + for d2 != nil { + if d2.parent == d { + return true + } + if d2.parent == d2 { + return false + } + d2 = d2.parent + } + return false +} + +// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. +func genericParentOrSelf(d *dentry) *dentry { + if d.parent != nil { + return d.parent + } + return d +} + +// PrependPath is a generic implementation of FilesystemImpl.PrependPath(). +func genericPrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *dentry, b *fspath.Builder) error { + for { + if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { + return vfs.PrependPathAtVFSRootError{} + } + if mnt != nil && &d.vfsd == mnt.Root() { + return nil + } + if d.parent == nil { + return vfs.PrependPathAtNonMountRootError{} + } + b.PrependComponent(d.name) + d = d.parent + } +} + +// DebugPathname returns a pathname to d relative to its filesystem root. +// DebugPathname does not correspond to any Linux function; it's used to +// generate dentry pathnames for debugging. +func genericDebugPathname(d *dentry) string { + var b fspath.Builder + _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) + return b.String() +} diff --git a/pkg/sentry/fsimpl/overlay/overlay_state_autogen.go b/pkg/sentry/fsimpl/overlay/overlay_state_autogen.go new file mode 100644 index 000000000..923a2e71a --- /dev/null +++ b/pkg/sentry/fsimpl/overlay/overlay_state_autogen.go @@ -0,0 +1,321 @@ +// automatically generated by stateify. + +package overlay + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fd *directoryFD) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.directoryFD" +} + +func (fd *directoryFD) StateFields() []string { + return []string{ + "fileDescription", + "DirectoryFileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "off", + "dirents", + } +} + +func (fd *directoryFD) beforeSave() {} + +// +checklocksignore +func (fd *directoryFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.off) + stateSinkObject.Save(4, &fd.dirents) +} + +func (fd *directoryFD) afterLoad() {} + +// +checklocksignore +func (fd *directoryFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.off) + stateSourceObject.Load(4, &fd.dirents) +} + +func (fstype *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.FilesystemType" +} + +func (fstype *FilesystemType) StateFields() []string { + return []string{} +} + +func (fstype *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fstype *FilesystemType) StateSave(stateSinkObject state.Sink) { + fstype.beforeSave() +} + +func (fstype *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fstype *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (f *FilesystemOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.FilesystemOptions" +} + +func (f *FilesystemOptions) StateFields() []string { + return []string{ + "UpperRoot", + "LowerRoots", + } +} + +func (f *FilesystemOptions) beforeSave() {} + +// +checklocksignore +func (f *FilesystemOptions) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.UpperRoot) + stateSinkObject.Save(1, &f.LowerRoots) +} + +func (f *FilesystemOptions) afterLoad() {} + +// +checklocksignore +func (f *FilesystemOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.UpperRoot) + stateSourceObject.Load(1, &f.LowerRoots) +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "vfsfs", + "opts", + "creds", + "privateDevMinors", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.vfsfs) + stateSinkObject.Save(1, &fs.opts) + stateSinkObject.Save(2, &fs.creds) + stateSinkObject.Save(3, &fs.privateDevMinors) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.vfsfs) + stateSourceObject.Load(1, &fs.opts) + stateSourceObject.Load(2, &fs.creds) + stateSourceObject.Load(3, &fs.privateDevMinors) +} + +func (l *layerDevNumber) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.layerDevNumber" +} + +func (l *layerDevNumber) StateFields() []string { + return []string{ + "major", + "minor", + } +} + +func (l *layerDevNumber) beforeSave() {} + +// +checklocksignore +func (l *layerDevNumber) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.major) + stateSinkObject.Save(1, &l.minor) +} + +func (l *layerDevNumber) afterLoad() {} + +// +checklocksignore +func (l *layerDevNumber) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.major) + stateSourceObject.Load(1, &l.minor) +} + +func (d *dentry) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.dentry" +} + +func (d *dentry) StateFields() []string { + return []string{ + "vfsd", + "refs", + "fs", + "mode", + "uid", + "gid", + "copiedUp", + "parent", + "name", + "children", + "dirents", + "upperVD", + "lowerVDs", + "inlineLowerVDs", + "devMajor", + "devMinor", + "ino", + "lowerMappings", + "wrappedMappable", + "isMappable", + "locks", + "watches", + } +} + +func (d *dentry) beforeSave() {} + +// +checklocksignore +func (d *dentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.refs) + stateSinkObject.Save(2, &d.fs) + stateSinkObject.Save(3, &d.mode) + stateSinkObject.Save(4, &d.uid) + stateSinkObject.Save(5, &d.gid) + stateSinkObject.Save(6, &d.copiedUp) + stateSinkObject.Save(7, &d.parent) + stateSinkObject.Save(8, &d.name) + stateSinkObject.Save(9, &d.children) + stateSinkObject.Save(10, &d.dirents) + stateSinkObject.Save(11, &d.upperVD) + stateSinkObject.Save(12, &d.lowerVDs) + stateSinkObject.Save(13, &d.inlineLowerVDs) + stateSinkObject.Save(14, &d.devMajor) + stateSinkObject.Save(15, &d.devMinor) + stateSinkObject.Save(16, &d.ino) + stateSinkObject.Save(17, &d.lowerMappings) + stateSinkObject.Save(18, &d.wrappedMappable) + stateSinkObject.Save(19, &d.isMappable) + stateSinkObject.Save(20, &d.locks) + stateSinkObject.Save(21, &d.watches) +} + +// +checklocksignore +func (d *dentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.refs) + stateSourceObject.Load(2, &d.fs) + stateSourceObject.Load(3, &d.mode) + stateSourceObject.Load(4, &d.uid) + stateSourceObject.Load(5, &d.gid) + stateSourceObject.Load(6, &d.copiedUp) + stateSourceObject.Load(7, &d.parent) + stateSourceObject.Load(8, &d.name) + stateSourceObject.Load(9, &d.children) + stateSourceObject.Load(10, &d.dirents) + stateSourceObject.Load(11, &d.upperVD) + stateSourceObject.Load(12, &d.lowerVDs) + stateSourceObject.Load(13, &d.inlineLowerVDs) + stateSourceObject.Load(14, &d.devMajor) + stateSourceObject.Load(15, &d.devMinor) + stateSourceObject.Load(16, &d.ino) + stateSourceObject.Load(17, &d.lowerMappings) + stateSourceObject.Load(18, &d.wrappedMappable) + stateSourceObject.Load(19, &d.isMappable) + stateSourceObject.Load(20, &d.locks) + stateSourceObject.Load(21, &d.watches) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (fd *fileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.fileDescription" +} + +func (fd *fileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + } +} + +func (fd *fileDescription) beforeSave() {} + +// +checklocksignore +func (fd *fileDescription) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.LockFD) +} + +func (fd *fileDescription) afterLoad() {} + +// +checklocksignore +func (fd *fileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.LockFD) +} + +func (fd *regularFileFD) StateTypeName() string { + return "pkg/sentry/fsimpl/overlay.regularFileFD" +} + +func (fd *regularFileFD) StateFields() []string { + return []string{ + "fileDescription", + "copiedUp", + "cachedFD", + "cachedFlags", + "lowerWaiters", + } +} + +func (fd *regularFileFD) beforeSave() {} + +// +checklocksignore +func (fd *regularFileFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.copiedUp) + stateSinkObject.Save(2, &fd.cachedFD) + stateSinkObject.Save(3, &fd.cachedFlags) + stateSinkObject.Save(4, &fd.lowerWaiters) +} + +func (fd *regularFileFD) afterLoad() {} + +// +checklocksignore +func (fd *regularFileFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.copiedUp) + stateSourceObject.Load(2, &fd.cachedFD) + stateSourceObject.Load(3, &fd.cachedFlags) + stateSourceObject.Load(4, &fd.lowerWaiters) +} + +func init() { + state.Register((*directoryFD)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*FilesystemOptions)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*layerDevNumber)(nil)) + state.Register((*dentry)(nil)) + state.Register((*fileDescription)(nil)) + state.Register((*regularFileFD)(nil)) +} diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD deleted file mode 100644 index a50510031..000000000 --- a/pkg/sentry/fsimpl/pipefs/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "pipefs", - srcs = ["pipefs.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/vfs", - ], -) diff --git a/pkg/sentry/fsimpl/pipefs/pipefs_state_autogen.go b/pkg/sentry/fsimpl/pipefs/pipefs_state_autogen.go new file mode 100644 index 000000000..5f9e117c3 --- /dev/null +++ b/pkg/sentry/fsimpl/pipefs/pipefs_state_autogen.go @@ -0,0 +1,111 @@ +// automatically generated by stateify. + +package pipefs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *filesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/pipefs.filesystemType" +} + +func (f *filesystemType) StateFields() []string { + return []string{} +} + +func (f *filesystemType) beforeSave() {} + +// +checklocksignore +func (f *filesystemType) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystemType) afterLoad() {} + +// +checklocksignore +func (f *filesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/pipefs.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (i *inode) StateTypeName() string { + return "pkg/sentry/fsimpl/pipefs.inode" +} + +func (i *inode) StateFields() []string { + return []string{ + "InodeNotDirectory", + "InodeNotSymlink", + "InodeNoopRefCount", + "locks", + "pipe", + "ino", + "uid", + "gid", + "ctime", + } +} + +func (i *inode) beforeSave() {} + +// +checklocksignore +func (i *inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeNotDirectory) + stateSinkObject.Save(1, &i.InodeNotSymlink) + stateSinkObject.Save(2, &i.InodeNoopRefCount) + stateSinkObject.Save(3, &i.locks) + stateSinkObject.Save(4, &i.pipe) + stateSinkObject.Save(5, &i.ino) + stateSinkObject.Save(6, &i.uid) + stateSinkObject.Save(7, &i.gid) + stateSinkObject.Save(8, &i.ctime) +} + +func (i *inode) afterLoad() {} + +// +checklocksignore +func (i *inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeNotDirectory) + stateSourceObject.Load(1, &i.InodeNotSymlink) + stateSourceObject.Load(2, &i.InodeNoopRefCount) + stateSourceObject.Load(3, &i.locks) + stateSourceObject.Load(4, &i.pipe) + stateSourceObject.Load(5, &i.ino) + stateSourceObject.Load(6, &i.uid) + stateSourceObject.Load(7, &i.gid) + stateSourceObject.Load(8, &i.ctime) +} + +func init() { + state.Register((*filesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inode)(nil)) +} diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD deleted file mode 100644 index 1d3d2d95f..000000000 --- a/pkg/sentry/fsimpl/proc/BUILD +++ /dev/null @@ -1,134 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "fd_dir_inode_refs", - out = "fd_dir_inode_refs.go", - package = "proc", - prefix = "fdDirInode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "fdDirInode", - }, -) - -go_template_instance( - name = "fd_info_dir_inode_refs", - out = "fd_info_dir_inode_refs.go", - package = "proc", - prefix = "fdInfoDirInode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "fdInfoDirInode", - }, -) - -go_template_instance( - name = "subtasks_inode_refs", - out = "subtasks_inode_refs.go", - package = "proc", - prefix = "subtasksInode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "subtasksInode", - }, -) - -go_template_instance( - name = "task_inode_refs", - out = "task_inode_refs.go", - package = "proc", - prefix = "taskInode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "taskInode", - }, -) - -go_template_instance( - name = "tasks_inode_refs", - out = "tasks_inode_refs.go", - package = "proc", - prefix = "tasksInode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "tasksInode", - }, -) - -go_library( - name = "proc", - srcs = [ - "fd_dir_inode_refs.go", - "fd_info_dir_inode_refs.go", - "filesystem.go", - "subtasks.go", - "subtasks_inode_refs.go", - "task.go", - "task_fds.go", - "task_files.go", - "task_inode_refs.go", - "task_net.go", - "tasks.go", - "tasks_files.go", - "tasks_inode_refs.go", - "tasks_sys.go", - "yama.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsbridge", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/tcpip/header", - "//pkg/tcpip/network/ipv4", - "//pkg/usermem", - ], -) - -go_test( - name = "proc_test", - size = "small", - srcs = [ - "tasks_sys_test.go", - "tasks_test.go", - ], - library = ":proc", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/sentry/contexttest", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go new file mode 100644 index 000000000..61138f055 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go @@ -0,0 +1,140 @@ +package proc + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const fdDirInodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var fdDirInodeobj *fdDirInode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type fdDirInodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *fdDirInodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *fdDirInodeRefs) RefType() string { + return fmt.Sprintf("%T", fdDirInodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *fdDirInodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *fdDirInodeRefs) LogRefs() bool { + return fdDirInodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *fdDirInodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *fdDirInodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if fdDirInodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *fdDirInodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if fdDirInodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *fdDirInodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if fdDirInodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *fdDirInodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go new file mode 100644 index 000000000..53fb0910a --- /dev/null +++ b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go @@ -0,0 +1,140 @@ +package proc + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const fdInfoDirInodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var fdInfoDirInodeobj *fdInfoDirInode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type fdInfoDirInodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *fdInfoDirInodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *fdInfoDirInodeRefs) RefType() string { + return fmt.Sprintf("%T", fdInfoDirInodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *fdInfoDirInodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *fdInfoDirInodeRefs) LogRefs() bool { + return fdInfoDirInodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *fdInfoDirInodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *fdInfoDirInodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if fdInfoDirInodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *fdInfoDirInodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if fdInfoDirInodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *fdInfoDirInodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if fdInfoDirInodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *fdInfoDirInodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go new file mode 100644 index 000000000..f87739c23 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go @@ -0,0 +1,2366 @@ +// automatically generated by stateify. + +package proc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *fdDirInodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdDirInodeRefs" +} + +func (r *fdDirInodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *fdDirInodeRefs) beforeSave() {} + +// +checklocksignore +func (r *fdDirInodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *fdDirInodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (r *fdInfoDirInodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdInfoDirInodeRefs" +} + +func (r *fdInfoDirInodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *fdInfoDirInodeRefs) beforeSave() {} + +// +checklocksignore +func (r *fdInfoDirInodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *fdInfoDirInodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (ft *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.FilesystemType" +} + +func (ft *FilesystemType) StateFields() []string { + return []string{} +} + +func (ft *FilesystemType) beforeSave() {} + +// +checklocksignore +func (ft *FilesystemType) StateSave(stateSinkObject state.Sink) { + ft.beforeSave() +} + +func (ft *FilesystemType) afterLoad() {} + +// +checklocksignore +func (ft *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (s *staticFile) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.staticFile" +} + +func (s *staticFile) StateFields() []string { + return []string{ + "DynamicBytesFile", + "StaticData", + } +} + +func (s *staticFile) beforeSave() {} + +// +checklocksignore +func (s *staticFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.StaticData) +} + +func (s *staticFile) afterLoad() {} + +// +checklocksignore +func (s *staticFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.StaticData) +} + +func (i *InternalData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.InternalData" +} + +func (i *InternalData) StateFields() []string { + return []string{ + "Cgroups", + } +} + +func (i *InternalData) beforeSave() {} + +// +checklocksignore +func (i *InternalData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Cgroups) +} + +func (i *InternalData) afterLoad() {} + +// +checklocksignore +func (i *InternalData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Cgroups) +} + +func (i *implStatFS) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.implStatFS" +} + +func (i *implStatFS) StateFields() []string { + return []string{} +} + +func (i *implStatFS) beforeSave() {} + +// +checklocksignore +func (i *implStatFS) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *implStatFS) afterLoad() {} + +// +checklocksignore +func (i *implStatFS) StateLoad(stateSourceObject state.Source) { +} + +func (i *subtasksInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.subtasksInode" +} + +func (i *subtasksInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + "subtasksInodeRefs", + "locks", + "fs", + "task", + "pidns", + "cgroupControllers", + } +} + +func (i *subtasksInode) beforeSave() {} + +// +checklocksignore +func (i *subtasksInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.implStatFS) + stateSinkObject.Save(1, &i.InodeAlwaysValid) + stateSinkObject.Save(2, &i.InodeAttrs) + stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(4, &i.InodeNotSymlink) + stateSinkObject.Save(5, &i.InodeTemporary) + stateSinkObject.Save(6, &i.OrderedChildren) + stateSinkObject.Save(7, &i.subtasksInodeRefs) + stateSinkObject.Save(8, &i.locks) + stateSinkObject.Save(9, &i.fs) + stateSinkObject.Save(10, &i.task) + stateSinkObject.Save(11, &i.pidns) + stateSinkObject.Save(12, &i.cgroupControllers) +} + +func (i *subtasksInode) afterLoad() {} + +// +checklocksignore +func (i *subtasksInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.implStatFS) + stateSourceObject.Load(1, &i.InodeAlwaysValid) + stateSourceObject.Load(2, &i.InodeAttrs) + stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(4, &i.InodeNotSymlink) + stateSourceObject.Load(5, &i.InodeTemporary) + stateSourceObject.Load(6, &i.OrderedChildren) + stateSourceObject.Load(7, &i.subtasksInodeRefs) + stateSourceObject.Load(8, &i.locks) + stateSourceObject.Load(9, &i.fs) + stateSourceObject.Load(10, &i.task) + stateSourceObject.Load(11, &i.pidns) + stateSourceObject.Load(12, &i.cgroupControllers) +} + +func (fd *subtasksFD) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.subtasksFD" +} + +func (fd *subtasksFD) StateFields() []string { + return []string{ + "GenericDirectoryFD", + "task", + } +} + +func (fd *subtasksFD) beforeSave() {} + +// +checklocksignore +func (fd *subtasksFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.GenericDirectoryFD) + stateSinkObject.Save(1, &fd.task) +} + +func (fd *subtasksFD) afterLoad() {} + +// +checklocksignore +func (fd *subtasksFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.GenericDirectoryFD) + stateSourceObject.Load(1, &fd.task) +} + +func (r *subtasksInodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.subtasksInodeRefs" +} + +func (r *subtasksInodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *subtasksInodeRefs) beforeSave() {} + +// +checklocksignore +func (r *subtasksInodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *subtasksInodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (i *taskInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.taskInode" +} + +func (i *taskInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + "taskInodeRefs", + "locks", + "task", + } +} + +func (i *taskInode) beforeSave() {} + +// +checklocksignore +func (i *taskInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.implStatFS) + stateSinkObject.Save(1, &i.InodeAttrs) + stateSinkObject.Save(2, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(3, &i.InodeNotSymlink) + stateSinkObject.Save(4, &i.InodeTemporary) + stateSinkObject.Save(5, &i.OrderedChildren) + stateSinkObject.Save(6, &i.taskInodeRefs) + stateSinkObject.Save(7, &i.locks) + stateSinkObject.Save(8, &i.task) +} + +func (i *taskInode) afterLoad() {} + +// +checklocksignore +func (i *taskInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.implStatFS) + stateSourceObject.Load(1, &i.InodeAttrs) + stateSourceObject.Load(2, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(3, &i.InodeNotSymlink) + stateSourceObject.Load(4, &i.InodeTemporary) + stateSourceObject.Load(5, &i.OrderedChildren) + stateSourceObject.Load(6, &i.taskInodeRefs) + stateSourceObject.Load(7, &i.locks) + stateSourceObject.Load(8, &i.task) +} + +func (i *taskOwnedInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.taskOwnedInode" +} + +func (i *taskOwnedInode) StateFields() []string { + return []string{ + "Inode", + "owner", + } +} + +func (i *taskOwnedInode) beforeSave() {} + +// +checklocksignore +func (i *taskOwnedInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Inode) + stateSinkObject.Save(1, &i.owner) +} + +func (i *taskOwnedInode) afterLoad() {} + +// +checklocksignore +func (i *taskOwnedInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Inode) + stateSourceObject.Load(1, &i.owner) +} + +func (i *fdDir) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdDir" +} + +func (i *fdDir) StateFields() []string { + return []string{ + "locks", + "fs", + "task", + "produceSymlink", + } +} + +func (i *fdDir) beforeSave() {} + +// +checklocksignore +func (i *fdDir) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.locks) + stateSinkObject.Save(1, &i.fs) + stateSinkObject.Save(2, &i.task) + stateSinkObject.Save(3, &i.produceSymlink) +} + +func (i *fdDir) afterLoad() {} + +// +checklocksignore +func (i *fdDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.locks) + stateSourceObject.Load(1, &i.fs) + stateSourceObject.Load(2, &i.task) + stateSourceObject.Load(3, &i.produceSymlink) +} + +func (i *fdDirInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdDirInode" +} + +func (i *fdDirInode) StateFields() []string { + return []string{ + "fdDir", + "fdDirInodeRefs", + "implStatFS", + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + } +} + +func (i *fdDirInode) beforeSave() {} + +// +checklocksignore +func (i *fdDirInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fdDir) + stateSinkObject.Save(1, &i.fdDirInodeRefs) + stateSinkObject.Save(2, &i.implStatFS) + stateSinkObject.Save(3, &i.InodeAlwaysValid) + stateSinkObject.Save(4, &i.InodeAttrs) + stateSinkObject.Save(5, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(6, &i.InodeNotSymlink) + stateSinkObject.Save(7, &i.InodeTemporary) + stateSinkObject.Save(8, &i.OrderedChildren) +} + +func (i *fdDirInode) afterLoad() {} + +// +checklocksignore +func (i *fdDirInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.fdDir) + stateSourceObject.Load(1, &i.fdDirInodeRefs) + stateSourceObject.Load(2, &i.implStatFS) + stateSourceObject.Load(3, &i.InodeAlwaysValid) + stateSourceObject.Load(4, &i.InodeAttrs) + stateSourceObject.Load(5, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(6, &i.InodeNotSymlink) + stateSourceObject.Load(7, &i.InodeTemporary) + stateSourceObject.Load(8, &i.OrderedChildren) +} + +func (s *fdSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdSymlink" +} + +func (s *fdSymlink) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeSymlink", + "fs", + "task", + "fd", + } +} + +func (s *fdSymlink) beforeSave() {} + +// +checklocksignore +func (s *fdSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.implStatFS) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeNoopRefCount) + stateSinkObject.Save(3, &s.InodeSymlink) + stateSinkObject.Save(4, &s.fs) + stateSinkObject.Save(5, &s.task) + stateSinkObject.Save(6, &s.fd) +} + +func (s *fdSymlink) afterLoad() {} + +// +checklocksignore +func (s *fdSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.implStatFS) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeNoopRefCount) + stateSourceObject.Load(3, &s.InodeSymlink) + stateSourceObject.Load(4, &s.fs) + stateSourceObject.Load(5, &s.task) + stateSourceObject.Load(6, &s.fd) +} + +func (i *fdInfoDirInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdInfoDirInode" +} + +func (i *fdInfoDirInode) StateFields() []string { + return []string{ + "fdDir", + "fdInfoDirInodeRefs", + "implStatFS", + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + } +} + +func (i *fdInfoDirInode) beforeSave() {} + +// +checklocksignore +func (i *fdInfoDirInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fdDir) + stateSinkObject.Save(1, &i.fdInfoDirInodeRefs) + stateSinkObject.Save(2, &i.implStatFS) + stateSinkObject.Save(3, &i.InodeAlwaysValid) + stateSinkObject.Save(4, &i.InodeAttrs) + stateSinkObject.Save(5, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(6, &i.InodeNotSymlink) + stateSinkObject.Save(7, &i.InodeTemporary) + stateSinkObject.Save(8, &i.OrderedChildren) +} + +func (i *fdInfoDirInode) afterLoad() {} + +// +checklocksignore +func (i *fdInfoDirInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.fdDir) + stateSourceObject.Load(1, &i.fdInfoDirInodeRefs) + stateSourceObject.Load(2, &i.implStatFS) + stateSourceObject.Load(3, &i.InodeAlwaysValid) + stateSourceObject.Load(4, &i.InodeAttrs) + stateSourceObject.Load(5, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(6, &i.InodeNotSymlink) + stateSourceObject.Load(7, &i.InodeTemporary) + stateSourceObject.Load(8, &i.OrderedChildren) +} + +func (d *fdInfoData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.fdInfoData" +} + +func (d *fdInfoData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "fs", + "task", + "fd", + } +} + +func (d *fdInfoData) beforeSave() {} + +// +checklocksignore +func (d *fdInfoData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.fs) + stateSinkObject.Save(2, &d.task) + stateSinkObject.Save(3, &d.fd) +} + +func (d *fdInfoData) afterLoad() {} + +// +checklocksignore +func (d *fdInfoData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.fs) + stateSourceObject.Load(2, &d.task) + stateSourceObject.Load(3, &d.fd) +} + +func (d *auxvData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.auxvData" +} + +func (d *auxvData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (d *auxvData) beforeSave() {} + +// +checklocksignore +func (d *auxvData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.task) +} + +func (d *auxvData) afterLoad() {} + +// +checklocksignore +func (d *auxvData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.task) +} + +func (d *cmdlineData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.cmdlineData" +} + +func (d *cmdlineData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + "arg", + } +} + +func (d *cmdlineData) beforeSave() {} + +// +checklocksignore +func (d *cmdlineData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.task) + stateSinkObject.Save(2, &d.arg) +} + +func (d *cmdlineData) afterLoad() {} + +// +checklocksignore +func (d *cmdlineData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.task) + stateSourceObject.Load(2, &d.arg) +} + +func (i *commInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.commInode" +} + +func (i *commInode) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (i *commInode) beforeSave() {} + +// +checklocksignore +func (i *commInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.DynamicBytesFile) + stateSinkObject.Save(1, &i.task) +} + +func (i *commInode) afterLoad() {} + +// +checklocksignore +func (i *commInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.DynamicBytesFile) + stateSourceObject.Load(1, &i.task) +} + +func (d *commData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.commData" +} + +func (d *commData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (d *commData) beforeSave() {} + +// +checklocksignore +func (d *commData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.task) +} + +func (d *commData) afterLoad() {} + +// +checklocksignore +func (d *commData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.task) +} + +func (d *idMapData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.idMapData" +} + +func (d *idMapData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + "gids", + } +} + +func (d *idMapData) beforeSave() {} + +// +checklocksignore +func (d *idMapData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.task) + stateSinkObject.Save(2, &d.gids) +} + +func (d *idMapData) afterLoad() {} + +// +checklocksignore +func (d *idMapData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.task) + stateSourceObject.Load(2, &d.gids) +} + +func (f *memInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.memInode" +} + +func (f *memInode) StateFields() []string { + return []string{ + "InodeAttrs", + "InodeNoStatFS", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + "task", + "locks", + } +} + +func (f *memInode) beforeSave() {} + +// +checklocksignore +func (f *memInode) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeAttrs) + stateSinkObject.Save(1, &f.InodeNoStatFS) + stateSinkObject.Save(2, &f.InodeNoopRefCount) + stateSinkObject.Save(3, &f.InodeNotDirectory) + stateSinkObject.Save(4, &f.InodeNotSymlink) + stateSinkObject.Save(5, &f.task) + stateSinkObject.Save(6, &f.locks) +} + +func (f *memInode) afterLoad() {} + +// +checklocksignore +func (f *memInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeAttrs) + stateSourceObject.Load(1, &f.InodeNoStatFS) + stateSourceObject.Load(2, &f.InodeNoopRefCount) + stateSourceObject.Load(3, &f.InodeNotDirectory) + stateSourceObject.Load(4, &f.InodeNotSymlink) + stateSourceObject.Load(5, &f.task) + stateSourceObject.Load(6, &f.locks) +} + +func (fd *memFD) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.memFD" +} + +func (fd *memFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + "inode", + "offset", + } +} + +func (fd *memFD) beforeSave() {} + +// +checklocksignore +func (fd *memFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.LockFD) + stateSinkObject.Save(3, &fd.inode) + stateSinkObject.Save(4, &fd.offset) +} + +func (fd *memFD) afterLoad() {} + +// +checklocksignore +func (fd *memFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.LockFD) + stateSourceObject.Load(3, &fd.inode) + stateSourceObject.Load(4, &fd.offset) +} + +func (d *mapsData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.mapsData" +} + +func (d *mapsData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (d *mapsData) beforeSave() {} + +// +checklocksignore +func (d *mapsData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.task) +} + +func (d *mapsData) afterLoad() {} + +// +checklocksignore +func (d *mapsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.task) +} + +func (d *smapsData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.smapsData" +} + +func (d *smapsData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (d *smapsData) beforeSave() {} + +// +checklocksignore +func (d *smapsData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.task) +} + +func (d *smapsData) afterLoad() {} + +// +checklocksignore +func (d *smapsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.task) +} + +func (s *taskStatData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.taskStatData" +} + +func (s *taskStatData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + "tgstats", + "pidns", + } +} + +func (s *taskStatData) beforeSave() {} + +// +checklocksignore +func (s *taskStatData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.task) + stateSinkObject.Save(2, &s.tgstats) + stateSinkObject.Save(3, &s.pidns) +} + +func (s *taskStatData) afterLoad() {} + +// +checklocksignore +func (s *taskStatData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.task) + stateSourceObject.Load(2, &s.tgstats) + stateSourceObject.Load(3, &s.pidns) +} + +func (s *statmData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.statmData" +} + +func (s *statmData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (s *statmData) beforeSave() {} + +// +checklocksignore +func (s *statmData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.task) +} + +func (s *statmData) afterLoad() {} + +// +checklocksignore +func (s *statmData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.task) +} + +func (s *statusData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.statusData" +} + +func (s *statusData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + "pidns", + } +} + +func (s *statusData) beforeSave() {} + +// +checklocksignore +func (s *statusData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.task) + stateSinkObject.Save(2, &s.pidns) +} + +func (s *statusData) afterLoad() {} + +// +checklocksignore +func (s *statusData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.task) + stateSourceObject.Load(2, &s.pidns) +} + +func (i *ioData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.ioData" +} + +func (i *ioData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "ioUsage", + } +} + +func (i *ioData) beforeSave() {} + +// +checklocksignore +func (i *ioData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.DynamicBytesFile) + stateSinkObject.Save(1, &i.ioUsage) +} + +func (i *ioData) afterLoad() {} + +// +checklocksignore +func (i *ioData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.DynamicBytesFile) + stateSourceObject.Load(1, &i.ioUsage) +} + +func (o *oomScoreAdj) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.oomScoreAdj" +} + +func (o *oomScoreAdj) StateFields() []string { + return []string{ + "DynamicBytesFile", + "task", + } +} + +func (o *oomScoreAdj) beforeSave() {} + +// +checklocksignore +func (o *oomScoreAdj) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.DynamicBytesFile) + stateSinkObject.Save(1, &o.task) +} + +func (o *oomScoreAdj) afterLoad() {} + +// +checklocksignore +func (o *oomScoreAdj) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.DynamicBytesFile) + stateSourceObject.Load(1, &o.task) +} + +func (s *exeSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.exeSymlink" +} + +func (s *exeSymlink) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeSymlink", + "fs", + "task", + } +} + +func (s *exeSymlink) beforeSave() {} + +// +checklocksignore +func (s *exeSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.implStatFS) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeNoopRefCount) + stateSinkObject.Save(3, &s.InodeSymlink) + stateSinkObject.Save(4, &s.fs) + stateSinkObject.Save(5, &s.task) +} + +func (s *exeSymlink) afterLoad() {} + +// +checklocksignore +func (s *exeSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.implStatFS) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeNoopRefCount) + stateSourceObject.Load(3, &s.InodeSymlink) + stateSourceObject.Load(4, &s.fs) + stateSourceObject.Load(5, &s.task) +} + +func (s *cwdSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.cwdSymlink" +} + +func (s *cwdSymlink) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeSymlink", + "fs", + "task", + } +} + +func (s *cwdSymlink) beforeSave() {} + +// +checklocksignore +func (s *cwdSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.implStatFS) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeNoopRefCount) + stateSinkObject.Save(3, &s.InodeSymlink) + stateSinkObject.Save(4, &s.fs) + stateSinkObject.Save(5, &s.task) +} + +func (s *cwdSymlink) afterLoad() {} + +// +checklocksignore +func (s *cwdSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.implStatFS) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeNoopRefCount) + stateSourceObject.Load(3, &s.InodeSymlink) + stateSourceObject.Load(4, &s.fs) + stateSourceObject.Load(5, &s.task) +} + +func (i *mountInfoData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.mountInfoData" +} + +func (i *mountInfoData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "fs", + "task", + } +} + +func (i *mountInfoData) beforeSave() {} + +// +checklocksignore +func (i *mountInfoData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.DynamicBytesFile) + stateSinkObject.Save(1, &i.fs) + stateSinkObject.Save(2, &i.task) +} + +func (i *mountInfoData) afterLoad() {} + +// +checklocksignore +func (i *mountInfoData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.DynamicBytesFile) + stateSourceObject.Load(1, &i.fs) + stateSourceObject.Load(2, &i.task) +} + +func (i *mountsData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.mountsData" +} + +func (i *mountsData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "fs", + "task", + } +} + +func (i *mountsData) beforeSave() {} + +// +checklocksignore +func (i *mountsData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.DynamicBytesFile) + stateSinkObject.Save(1, &i.fs) + stateSinkObject.Save(2, &i.task) +} + +func (i *mountsData) afterLoad() {} + +// +checklocksignore +func (i *mountsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.DynamicBytesFile) + stateSourceObject.Load(1, &i.fs) + stateSourceObject.Load(2, &i.task) +} + +func (s *namespaceSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.namespaceSymlink" +} + +func (s *namespaceSymlink) StateFields() []string { + return []string{ + "StaticSymlink", + "task", + } +} + +func (s *namespaceSymlink) beforeSave() {} + +// +checklocksignore +func (s *namespaceSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.StaticSymlink) + stateSinkObject.Save(1, &s.task) +} + +func (s *namespaceSymlink) afterLoad() {} + +// +checklocksignore +func (s *namespaceSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.StaticSymlink) + stateSourceObject.Load(1, &s.task) +} + +func (i *namespaceInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.namespaceInode" +} + +func (i *namespaceInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + "locks", + } +} + +func (i *namespaceInode) beforeSave() {} + +// +checklocksignore +func (i *namespaceInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.implStatFS) + stateSinkObject.Save(1, &i.InodeAttrs) + stateSinkObject.Save(2, &i.InodeNoopRefCount) + stateSinkObject.Save(3, &i.InodeNotDirectory) + stateSinkObject.Save(4, &i.InodeNotSymlink) + stateSinkObject.Save(5, &i.locks) +} + +func (i *namespaceInode) afterLoad() {} + +// +checklocksignore +func (i *namespaceInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.implStatFS) + stateSourceObject.Load(1, &i.InodeAttrs) + stateSourceObject.Load(2, &i.InodeNoopRefCount) + stateSourceObject.Load(3, &i.InodeNotDirectory) + stateSourceObject.Load(4, &i.InodeNotSymlink) + stateSourceObject.Load(5, &i.locks) +} + +func (fd *namespaceFD) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.namespaceFD" +} + +func (fd *namespaceFD) StateFields() []string { + return []string{ + "FileDescriptionDefaultImpl", + "LockFD", + "vfsfd", + "inode", + } +} + +func (fd *namespaceFD) beforeSave() {} + +// +checklocksignore +func (fd *namespaceFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(1, &fd.LockFD) + stateSinkObject.Save(2, &fd.vfsfd) + stateSinkObject.Save(3, &fd.inode) +} + +func (fd *namespaceFD) afterLoad() {} + +// +checklocksignore +func (fd *namespaceFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(1, &fd.LockFD) + stateSourceObject.Load(2, &fd.vfsfd) + stateSourceObject.Load(3, &fd.inode) +} + +func (d *taskCgroupData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.taskCgroupData" +} + +func (d *taskCgroupData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + "task", + } +} + +func (d *taskCgroupData) beforeSave() {} + +// +checklocksignore +func (d *taskCgroupData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.dynamicBytesFileSetAttr) + stateSinkObject.Save(1, &d.task) +} + +func (d *taskCgroupData) afterLoad() {} + +// +checklocksignore +func (d *taskCgroupData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.dynamicBytesFileSetAttr) + stateSourceObject.Load(1, &d.task) +} + +func (r *taskInodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.taskInodeRefs" +} + +func (r *taskInodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *taskInodeRefs) beforeSave() {} + +// +checklocksignore +func (r *taskInodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *taskInodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (n *ifinet6) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.ifinet6" +} + +func (n *ifinet6) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + } +} + +func (n *ifinet6) beforeSave() {} + +// +checklocksignore +func (n *ifinet6) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.DynamicBytesFile) + stateSinkObject.Save(1, &n.stack) +} + +func (n *ifinet6) afterLoad() {} + +// +checklocksignore +func (n *ifinet6) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.DynamicBytesFile) + stateSourceObject.Load(1, &n.stack) +} + +func (n *netDevData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netDevData" +} + +func (n *netDevData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + } +} + +func (n *netDevData) beforeSave() {} + +// +checklocksignore +func (n *netDevData) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.DynamicBytesFile) + stateSinkObject.Save(1, &n.stack) +} + +func (n *netDevData) afterLoad() {} + +// +checklocksignore +func (n *netDevData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.DynamicBytesFile) + stateSourceObject.Load(1, &n.stack) +} + +func (n *netUnixData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netUnixData" +} + +func (n *netUnixData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "kernel", + } +} + +func (n *netUnixData) beforeSave() {} + +// +checklocksignore +func (n *netUnixData) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.DynamicBytesFile) + stateSinkObject.Save(1, &n.kernel) +} + +func (n *netUnixData) afterLoad() {} + +// +checklocksignore +func (n *netUnixData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.DynamicBytesFile) + stateSourceObject.Load(1, &n.kernel) +} + +func (d *netTCPData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netTCPData" +} + +func (d *netTCPData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "kernel", + } +} + +func (d *netTCPData) beforeSave() {} + +// +checklocksignore +func (d *netTCPData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.kernel) +} + +func (d *netTCPData) afterLoad() {} + +// +checklocksignore +func (d *netTCPData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.kernel) +} + +func (d *netTCP6Data) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netTCP6Data" +} + +func (d *netTCP6Data) StateFields() []string { + return []string{ + "DynamicBytesFile", + "kernel", + } +} + +func (d *netTCP6Data) beforeSave() {} + +// +checklocksignore +func (d *netTCP6Data) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.kernel) +} + +func (d *netTCP6Data) afterLoad() {} + +// +checklocksignore +func (d *netTCP6Data) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.kernel) +} + +func (d *netUDPData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netUDPData" +} + +func (d *netUDPData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "kernel", + } +} + +func (d *netUDPData) beforeSave() {} + +// +checklocksignore +func (d *netUDPData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.kernel) +} + +func (d *netUDPData) afterLoad() {} + +// +checklocksignore +func (d *netUDPData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.kernel) +} + +func (d *netSnmpData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netSnmpData" +} + +func (d *netSnmpData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + } +} + +func (d *netSnmpData) beforeSave() {} + +// +checklocksignore +func (d *netSnmpData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.stack) +} + +func (d *netSnmpData) afterLoad() {} + +// +checklocksignore +func (d *netSnmpData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.stack) +} + +func (s *snmpLine) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.snmpLine" +} + +func (s *snmpLine) StateFields() []string { + return []string{ + "prefix", + "header", + } +} + +func (s *snmpLine) beforeSave() {} + +// +checklocksignore +func (s *snmpLine) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.prefix) + stateSinkObject.Save(1, &s.header) +} + +func (s *snmpLine) afterLoad() {} + +// +checklocksignore +func (s *snmpLine) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.prefix) + stateSourceObject.Load(1, &s.header) +} + +func (d *netRouteData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netRouteData" +} + +func (d *netRouteData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + } +} + +func (d *netRouteData) beforeSave() {} + +// +checklocksignore +func (d *netRouteData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.stack) +} + +func (d *netRouteData) afterLoad() {} + +// +checklocksignore +func (d *netRouteData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.stack) +} + +func (d *netStatData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.netStatData" +} + +func (d *netStatData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + } +} + +func (d *netStatData) beforeSave() {} + +// +checklocksignore +func (d *netStatData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.stack) +} + +func (d *netStatData) afterLoad() {} + +// +checklocksignore +func (d *netStatData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.stack) +} + +func (i *tasksInode) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.tasksInode" +} + +func (i *tasksInode) StateFields() []string { + return []string{ + "implStatFS", + "InodeAlwaysValid", + "InodeAttrs", + "InodeDirectoryNoNewChildren", + "InodeNotSymlink", + "InodeTemporary", + "OrderedChildren", + "tasksInodeRefs", + "locks", + "fs", + "pidns", + "fakeCgroupControllers", + } +} + +func (i *tasksInode) beforeSave() {} + +// +checklocksignore +func (i *tasksInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.implStatFS) + stateSinkObject.Save(1, &i.InodeAlwaysValid) + stateSinkObject.Save(2, &i.InodeAttrs) + stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) + stateSinkObject.Save(4, &i.InodeNotSymlink) + stateSinkObject.Save(5, &i.InodeTemporary) + stateSinkObject.Save(6, &i.OrderedChildren) + stateSinkObject.Save(7, &i.tasksInodeRefs) + stateSinkObject.Save(8, &i.locks) + stateSinkObject.Save(9, &i.fs) + stateSinkObject.Save(10, &i.pidns) + stateSinkObject.Save(11, &i.fakeCgroupControllers) +} + +func (i *tasksInode) afterLoad() {} + +// +checklocksignore +func (i *tasksInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.implStatFS) + stateSourceObject.Load(1, &i.InodeAlwaysValid) + stateSourceObject.Load(2, &i.InodeAttrs) + stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) + stateSourceObject.Load(4, &i.InodeNotSymlink) + stateSourceObject.Load(5, &i.InodeTemporary) + stateSourceObject.Load(6, &i.OrderedChildren) + stateSourceObject.Load(7, &i.tasksInodeRefs) + stateSourceObject.Load(8, &i.locks) + stateSourceObject.Load(9, &i.fs) + stateSourceObject.Load(10, &i.pidns) + stateSourceObject.Load(11, &i.fakeCgroupControllers) +} + +func (s *staticFileSetStat) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.staticFileSetStat" +} + +func (s *staticFileSetStat) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + "StaticData", + } +} + +func (s *staticFileSetStat) beforeSave() {} + +// +checklocksignore +func (s *staticFileSetStat) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.dynamicBytesFileSetAttr) + stateSinkObject.Save(1, &s.StaticData) +} + +func (s *staticFileSetStat) afterLoad() {} + +// +checklocksignore +func (s *staticFileSetStat) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.dynamicBytesFileSetAttr) + stateSourceObject.Load(1, &s.StaticData) +} + +func (s *selfSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.selfSymlink" +} + +func (s *selfSymlink) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeSymlink", + "pidns", + } +} + +func (s *selfSymlink) beforeSave() {} + +// +checklocksignore +func (s *selfSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.implStatFS) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeNoopRefCount) + stateSinkObject.Save(3, &s.InodeSymlink) + stateSinkObject.Save(4, &s.pidns) +} + +func (s *selfSymlink) afterLoad() {} + +// +checklocksignore +func (s *selfSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.implStatFS) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeNoopRefCount) + stateSourceObject.Load(3, &s.InodeSymlink) + stateSourceObject.Load(4, &s.pidns) +} + +func (s *threadSelfSymlink) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.threadSelfSymlink" +} + +func (s *threadSelfSymlink) StateFields() []string { + return []string{ + "implStatFS", + "InodeAttrs", + "InodeNoopRefCount", + "InodeSymlink", + "pidns", + } +} + +func (s *threadSelfSymlink) beforeSave() {} + +// +checklocksignore +func (s *threadSelfSymlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.implStatFS) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeNoopRefCount) + stateSinkObject.Save(3, &s.InodeSymlink) + stateSinkObject.Save(4, &s.pidns) +} + +func (s *threadSelfSymlink) afterLoad() {} + +// +checklocksignore +func (s *threadSelfSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.implStatFS) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeNoopRefCount) + stateSourceObject.Load(3, &s.InodeSymlink) + stateSourceObject.Load(4, &s.pidns) +} + +func (d *dynamicBytesFileSetAttr) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.dynamicBytesFileSetAttr" +} + +func (d *dynamicBytesFileSetAttr) StateFields() []string { + return []string{ + "DynamicBytesFile", + } +} + +func (d *dynamicBytesFileSetAttr) beforeSave() {} + +// +checklocksignore +func (d *dynamicBytesFileSetAttr) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) +} + +func (d *dynamicBytesFileSetAttr) afterLoad() {} + +// +checklocksignore +func (d *dynamicBytesFileSetAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) +} + +func (c *cpuStats) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.cpuStats" +} + +func (c *cpuStats) StateFields() []string { + return []string{ + "user", + "nice", + "system", + "idle", + "ioWait", + "irq", + "softirq", + "steal", + "guest", + "guestNice", + } +} + +func (c *cpuStats) beforeSave() {} + +// +checklocksignore +func (c *cpuStats) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.user) + stateSinkObject.Save(1, &c.nice) + stateSinkObject.Save(2, &c.system) + stateSinkObject.Save(3, &c.idle) + stateSinkObject.Save(4, &c.ioWait) + stateSinkObject.Save(5, &c.irq) + stateSinkObject.Save(6, &c.softirq) + stateSinkObject.Save(7, &c.steal) + stateSinkObject.Save(8, &c.guest) + stateSinkObject.Save(9, &c.guestNice) +} + +func (c *cpuStats) afterLoad() {} + +// +checklocksignore +func (c *cpuStats) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.user) + stateSourceObject.Load(1, &c.nice) + stateSourceObject.Load(2, &c.system) + stateSourceObject.Load(3, &c.idle) + stateSourceObject.Load(4, &c.ioWait) + stateSourceObject.Load(5, &c.irq) + stateSourceObject.Load(6, &c.softirq) + stateSourceObject.Load(7, &c.steal) + stateSourceObject.Load(8, &c.guest) + stateSourceObject.Load(9, &c.guestNice) +} + +func (s *statData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.statData" +} + +func (s *statData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (s *statData) beforeSave() {} + +// +checklocksignore +func (s *statData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.dynamicBytesFileSetAttr) +} + +func (s *statData) afterLoad() {} + +// +checklocksignore +func (s *statData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.dynamicBytesFileSetAttr) +} + +func (l *loadavgData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.loadavgData" +} + +func (l *loadavgData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (l *loadavgData) beforeSave() {} + +// +checklocksignore +func (l *loadavgData) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.dynamicBytesFileSetAttr) +} + +func (l *loadavgData) afterLoad() {} + +// +checklocksignore +func (l *loadavgData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.dynamicBytesFileSetAttr) +} + +func (m *meminfoData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.meminfoData" +} + +func (m *meminfoData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (m *meminfoData) beforeSave() {} + +// +checklocksignore +func (m *meminfoData) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.dynamicBytesFileSetAttr) +} + +func (m *meminfoData) afterLoad() {} + +// +checklocksignore +func (m *meminfoData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.dynamicBytesFileSetAttr) +} + +func (u *uptimeData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.uptimeData" +} + +func (u *uptimeData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (u *uptimeData) beforeSave() {} + +// +checklocksignore +func (u *uptimeData) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.dynamicBytesFileSetAttr) +} + +func (u *uptimeData) afterLoad() {} + +// +checklocksignore +func (u *uptimeData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.dynamicBytesFileSetAttr) +} + +func (v *versionData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.versionData" +} + +func (v *versionData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (v *versionData) beforeSave() {} + +// +checklocksignore +func (v *versionData) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.dynamicBytesFileSetAttr) +} + +func (v *versionData) afterLoad() {} + +// +checklocksignore +func (v *versionData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.dynamicBytesFileSetAttr) +} + +func (d *filesystemsData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.filesystemsData" +} + +func (d *filesystemsData) StateFields() []string { + return []string{ + "DynamicBytesFile", + } +} + +func (d *filesystemsData) beforeSave() {} + +// +checklocksignore +func (d *filesystemsData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) +} + +func (d *filesystemsData) afterLoad() {} + +// +checklocksignore +func (d *filesystemsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) +} + +func (c *cgroupsData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.cgroupsData" +} + +func (c *cgroupsData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (c *cgroupsData) beforeSave() {} + +// +checklocksignore +func (c *cgroupsData) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.dynamicBytesFileSetAttr) +} + +func (c *cgroupsData) afterLoad() {} + +// +checklocksignore +func (c *cgroupsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.dynamicBytesFileSetAttr) +} + +func (c *cmdLineData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.cmdLineData" +} + +func (c *cmdLineData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (c *cmdLineData) beforeSave() {} + +// +checklocksignore +func (c *cmdLineData) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.dynamicBytesFileSetAttr) +} + +func (c *cmdLineData) afterLoad() {} + +// +checklocksignore +func (c *cmdLineData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.dynamicBytesFileSetAttr) +} + +func (r *tasksInodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.tasksInodeRefs" +} + +func (r *tasksInodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *tasksInodeRefs) beforeSave() {} + +// +checklocksignore +func (r *tasksInodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *tasksInodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (t *tcpMemDir) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.tcpMemDir" +} + +func (t *tcpMemDir) StateFields() []string { + return nil +} + +func (d *mmapMinAddrData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.mmapMinAddrData" +} + +func (d *mmapMinAddrData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "k", + } +} + +func (d *mmapMinAddrData) beforeSave() {} + +// +checklocksignore +func (d *mmapMinAddrData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.k) +} + +func (d *mmapMinAddrData) afterLoad() {} + +// +checklocksignore +func (d *mmapMinAddrData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.k) +} + +func (h *hostnameData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.hostnameData" +} + +func (h *hostnameData) StateFields() []string { + return []string{ + "DynamicBytesFile", + } +} + +func (h *hostnameData) beforeSave() {} + +// +checklocksignore +func (h *hostnameData) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.DynamicBytesFile) +} + +func (h *hostnameData) afterLoad() {} + +// +checklocksignore +func (h *hostnameData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.DynamicBytesFile) +} + +func (d *tcpSackData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.tcpSackData" +} + +func (d *tcpSackData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + "enabled", + } +} + +func (d *tcpSackData) beforeSave() {} + +// +checklocksignore +func (d *tcpSackData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.stack) + stateSinkObject.Save(2, &d.enabled) +} + +func (d *tcpSackData) afterLoad() {} + +// +checklocksignore +func (d *tcpSackData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.LoadWait(1, &d.stack) + stateSourceObject.Load(2, &d.enabled) +} + +func (d *tcpRecoveryData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.tcpRecoveryData" +} + +func (d *tcpRecoveryData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + } +} + +func (d *tcpRecoveryData) beforeSave() {} + +// +checklocksignore +func (d *tcpRecoveryData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.stack) +} + +func (d *tcpRecoveryData) afterLoad() {} + +// +checklocksignore +func (d *tcpRecoveryData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.LoadWait(1, &d.stack) +} + +func (d *tcpMemData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.tcpMemData" +} + +func (d *tcpMemData) StateFields() []string { + return []string{ + "DynamicBytesFile", + "dir", + "stack", + } +} + +func (d *tcpMemData) beforeSave() {} + +// +checklocksignore +func (d *tcpMemData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.DynamicBytesFile) + stateSinkObject.Save(1, &d.dir) + stateSinkObject.Save(2, &d.stack) +} + +func (d *tcpMemData) afterLoad() {} + +// +checklocksignore +func (d *tcpMemData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.DynamicBytesFile) + stateSourceObject.Load(1, &d.dir) + stateSourceObject.LoadWait(2, &d.stack) +} + +func (ipf *ipForwarding) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.ipForwarding" +} + +func (ipf *ipForwarding) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + "enabled", + } +} + +func (ipf *ipForwarding) beforeSave() {} + +// +checklocksignore +func (ipf *ipForwarding) StateSave(stateSinkObject state.Sink) { + ipf.beforeSave() + stateSinkObject.Save(0, &ipf.DynamicBytesFile) + stateSinkObject.Save(1, &ipf.stack) + stateSinkObject.Save(2, &ipf.enabled) +} + +func (ipf *ipForwarding) afterLoad() {} + +// +checklocksignore +func (ipf *ipForwarding) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ipf.DynamicBytesFile) + stateSourceObject.LoadWait(1, &ipf.stack) + stateSourceObject.Load(2, &ipf.enabled) +} + +func (pr *portRange) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.portRange" +} + +func (pr *portRange) StateFields() []string { + return []string{ + "DynamicBytesFile", + "stack", + "start", + "end", + } +} + +func (pr *portRange) beforeSave() {} + +// +checklocksignore +func (pr *portRange) StateSave(stateSinkObject state.Sink) { + pr.beforeSave() + stateSinkObject.Save(0, &pr.DynamicBytesFile) + stateSinkObject.Save(1, &pr.stack) + stateSinkObject.Save(2, &pr.start) + stateSinkObject.Save(3, &pr.end) +} + +func (pr *portRange) afterLoad() {} + +// +checklocksignore +func (pr *portRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &pr.DynamicBytesFile) + stateSourceObject.LoadWait(1, &pr.stack) + stateSourceObject.Load(2, &pr.start) + stateSourceObject.Load(3, &pr.end) +} + +func (s *yamaPtraceScope) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.yamaPtraceScope" +} + +func (s *yamaPtraceScope) StateFields() []string { + return []string{ + "DynamicBytesFile", + "level", + } +} + +func (s *yamaPtraceScope) beforeSave() {} + +// +checklocksignore +func (s *yamaPtraceScope) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.level) +} + +func (s *yamaPtraceScope) afterLoad() {} + +// +checklocksignore +func (s *yamaPtraceScope) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.level) +} + +func init() { + state.Register((*fdDirInodeRefs)(nil)) + state.Register((*fdInfoDirInodeRefs)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*staticFile)(nil)) + state.Register((*InternalData)(nil)) + state.Register((*implStatFS)(nil)) + state.Register((*subtasksInode)(nil)) + state.Register((*subtasksFD)(nil)) + state.Register((*subtasksInodeRefs)(nil)) + state.Register((*taskInode)(nil)) + state.Register((*taskOwnedInode)(nil)) + state.Register((*fdDir)(nil)) + state.Register((*fdDirInode)(nil)) + state.Register((*fdSymlink)(nil)) + state.Register((*fdInfoDirInode)(nil)) + state.Register((*fdInfoData)(nil)) + state.Register((*auxvData)(nil)) + state.Register((*cmdlineData)(nil)) + state.Register((*commInode)(nil)) + state.Register((*commData)(nil)) + state.Register((*idMapData)(nil)) + state.Register((*memInode)(nil)) + state.Register((*memFD)(nil)) + state.Register((*mapsData)(nil)) + state.Register((*smapsData)(nil)) + state.Register((*taskStatData)(nil)) + state.Register((*statmData)(nil)) + state.Register((*statusData)(nil)) + state.Register((*ioData)(nil)) + state.Register((*oomScoreAdj)(nil)) + state.Register((*exeSymlink)(nil)) + state.Register((*cwdSymlink)(nil)) + state.Register((*mountInfoData)(nil)) + state.Register((*mountsData)(nil)) + state.Register((*namespaceSymlink)(nil)) + state.Register((*namespaceInode)(nil)) + state.Register((*namespaceFD)(nil)) + state.Register((*taskCgroupData)(nil)) + state.Register((*taskInodeRefs)(nil)) + state.Register((*ifinet6)(nil)) + state.Register((*netDevData)(nil)) + state.Register((*netUnixData)(nil)) + state.Register((*netTCPData)(nil)) + state.Register((*netTCP6Data)(nil)) + state.Register((*netUDPData)(nil)) + state.Register((*netSnmpData)(nil)) + state.Register((*snmpLine)(nil)) + state.Register((*netRouteData)(nil)) + state.Register((*netStatData)(nil)) + state.Register((*tasksInode)(nil)) + state.Register((*staticFileSetStat)(nil)) + state.Register((*selfSymlink)(nil)) + state.Register((*threadSelfSymlink)(nil)) + state.Register((*dynamicBytesFileSetAttr)(nil)) + state.Register((*cpuStats)(nil)) + state.Register((*statData)(nil)) + state.Register((*loadavgData)(nil)) + state.Register((*meminfoData)(nil)) + state.Register((*uptimeData)(nil)) + state.Register((*versionData)(nil)) + state.Register((*filesystemsData)(nil)) + state.Register((*cgroupsData)(nil)) + state.Register((*cmdLineData)(nil)) + state.Register((*tasksInodeRefs)(nil)) + state.Register((*tcpMemDir)(nil)) + state.Register((*mmapMinAddrData)(nil)) + state.Register((*hostnameData)(nil)) + state.Register((*tcpSackData)(nil)) + state.Register((*tcpRecoveryData)(nil)) + state.Register((*tcpMemData)(nil)) + state.Register((*ipForwarding)(nil)) + state.Register((*portRange)(nil)) + state.Register((*yamaPtraceScope)(nil)) +} diff --git a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go new file mode 100644 index 000000000..bd4998cbc --- /dev/null +++ b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go @@ -0,0 +1,140 @@ +package proc + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const subtasksInodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var subtasksInodeobj *subtasksInode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type subtasksInodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *subtasksInodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *subtasksInodeRefs) RefType() string { + return fmt.Sprintf("%T", subtasksInodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *subtasksInodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *subtasksInodeRefs) LogRefs() bool { + return subtasksInodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *subtasksInodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *subtasksInodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if subtasksInodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *subtasksInodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if subtasksInodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *subtasksInodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if subtasksInodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *subtasksInodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/proc/task_inode_refs.go b/pkg/sentry/fsimpl/proc/task_inode_refs.go new file mode 100644 index 000000000..82c63213a --- /dev/null +++ b/pkg/sentry/fsimpl/proc/task_inode_refs.go @@ -0,0 +1,140 @@ +package proc + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const taskInodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var taskInodeobj *taskInode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type taskInodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *taskInodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *taskInodeRefs) RefType() string { + return fmt.Sprintf("%T", taskInodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *taskInodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *taskInodeRefs) LogRefs() bool { + return taskInodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *taskInodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *taskInodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if taskInodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *taskInodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if taskInodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *taskInodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if taskInodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *taskInodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go new file mode 100644 index 000000000..73adc5278 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go @@ -0,0 +1,140 @@ +package proc + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const tasksInodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var tasksInodeobj *tasksInode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type tasksInodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *tasksInodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *tasksInodeRefs) RefType() string { + return fmt.Sprintf("%T", tasksInodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *tasksInodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *tasksInodeRefs) LogRefs() bool { + return tasksInodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *tasksInodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *tasksInodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if tasksInodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *tasksInodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if tasksInodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *tasksInodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if tasksInodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *tasksInodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go deleted file mode 100644 index 19b012f7d..000000000 --- a/pkg/sentry/fsimpl/proc/tasks_sys_test.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/usermem" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{stack: newIPv6TestStack()} - var buf bytes.Buffer - n.Generate(contexttest.Context(t), &buf) - if buf.Len() > 0 { - t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{}) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{stack: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} - -// TestIPForwarding tests the implementation of -// /proc/sys/net/ipv4/ip_forwarding -func TestConfigureIPForwarding(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - - var cases = []struct { - comment string - initial bool - str string - final bool - }{ - { - comment: `Forwarding is disabled; write 1 and enable forwarding`, - initial: false, - str: "1", - final: true, - }, - { - comment: `Forwarding is disabled; write 0 and disable forwarding`, - initial: false, - str: "0", - final: false, - }, - { - comment: `Forwarding is enabled; write 1 and enable forwarding`, - initial: true, - str: "1", - final: true, - }, - { - comment: `Forwarding is enabled; write 0 and disable forwarding`, - initial: true, - str: "0", - final: false, - }, - { - comment: `Forwarding is disabled; write 2404 and enable forwarding`, - initial: false, - str: "2404", - final: true, - }, - { - comment: `Forwarding is enabled; write 2404 and enable forwarding`, - initial: true, - str: "2404", - final: true, - }, - } - for _, c := range cases { - t.Run(c.comment, func(t *testing.T) { - s.IPForwarding = c.initial - - file := &ipForwarding{stack: s, enabled: c.initial} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := file.Write(ctx, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("file.Write(ctx, nil, %q, 0) = (%d, %v); want (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if got, want := s.IPForwarding, c.final; got != want { - t.Errorf("s.IPForwarding incorrect; got: %v, want: %v", got, want) - } - }) - } -} diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go deleted file mode 100644 index 14f806c3c..000000000 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ /dev/null @@ -1,511 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "fmt" - "math" - "path" - "strconv" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -var ( - // Next offset 256 by convention. Adds 1 for the next offset. - selfLink = vfs.Dirent{Type: linux.DT_LNK, NextOff: 256 + 0 + 1} - threadSelfLink = vfs.Dirent{Type: linux.DT_LNK, NextOff: 256 + 1 + 1} - - // /proc/[pid] next offset starts at 256+2 (files above), then adds the - // PID, and adds 1 for the next offset. - proc1 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 1 + 1} - proc2 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 2 + 1} - proc3 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 3 + 1} -) - -var ( - tasksStaticFiles = map[string]testutil.DirentType{ - "cmdline": linux.DT_REG, - "cpuinfo": linux.DT_REG, - "filesystems": linux.DT_REG, - "loadavg": linux.DT_REG, - "meminfo": linux.DT_REG, - "mounts": linux.DT_LNK, - "net": linux.DT_LNK, - "self": linux.DT_LNK, - "stat": linux.DT_REG, - "sys": linux.DT_DIR, - "thread-self": linux.DT_LNK, - "uptime": linux.DT_REG, - "version": linux.DT_REG, - } - tasksStaticFilesNextOffs = map[string]int64{ - "self": selfLink.NextOff, - "thread-self": threadSelfLink.NextOff, - } - taskStaticFiles = map[string]testutil.DirentType{ - "auxv": linux.DT_REG, - "cgroup": linux.DT_REG, - "cwd": linux.DT_LNK, - "cmdline": linux.DT_REG, - "comm": linux.DT_REG, - "environ": linux.DT_REG, - "exe": linux.DT_LNK, - "fd": linux.DT_DIR, - "fdinfo": linux.DT_DIR, - "gid_map": linux.DT_REG, - "io": linux.DT_REG, - "maps": linux.DT_REG, - "mem": linux.DT_REG, - "mountinfo": linux.DT_REG, - "mounts": linux.DT_REG, - "net": linux.DT_DIR, - "ns": linux.DT_DIR, - "oom_score": linux.DT_REG, - "oom_score_adj": linux.DT_REG, - "smaps": linux.DT_REG, - "stat": linux.DT_REG, - "statm": linux.DT_REG, - "status": linux.DT_REG, - "task": linux.DT_DIR, - "uid_map": linux.DT_REG, - } -) - -func setup(t *testing.T) *testutil.System { - k, err := testutil.Boot() - if err != nil { - t.Fatalf("Error creating kernel: %v", err) - } - - ctx := k.SupervisorContext() - creds := auth.CredentialsFromContext(ctx) - - k.VFS().MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - - mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.MountOptions{}) - if err != nil { - t.Fatalf("NewMountNamespace(): %v", err) - } - root := mntns.Root() - root.IncRef() - defer root.DecRef(ctx) - pop := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse("/proc"), - } - if err := k.VFS().MkdirAt(ctx, creds, pop, &vfs.MkdirOptions{Mode: 0777}); err != nil { - t.Fatalf("MkDir(/proc): %v", err) - } - - pop = &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse("/proc"), - } - mntOpts := &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - InternalData: &InternalData{ - Cgroups: map[string]string{ - "cpuset": "/foo/cpuset", - "memory": "/foo/memory", - }, - }, - }, - } - if _, err := k.VFS().MountAt(ctx, creds, "", pop, Name, mntOpts); err != nil { - t.Fatalf("MountAt(/proc): %v", err) - } - return testutil.NewSystem(ctx, t, k.VFS(), mntns) -} - -func TestTasksEmpty(t *testing.T) { - s := setup(t) - defer s.Destroy() - - collector := s.ListDirents(s.PathOpAtRoot("/proc")) - s.AssertAllDirentTypes(collector, tasksStaticFiles) - s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs) -} - -func TestTasks(t *testing.T) { - s := setup(t) - defer s.Destroy() - - expectedDirents := make(map[string]testutil.DirentType) - for n, d := range tasksStaticFiles { - expectedDirents[n] = d - } - - k := kernel.KernelFromContext(s.Ctx) - var tasks []*kernel.Task - for i := 0; i < 5; i++ { - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - tasks = append(tasks, task) - expectedDirents[fmt.Sprintf("%d", i+1)] = linux.DT_DIR - } - - collector := s.ListDirents(s.PathOpAtRoot("/proc")) - s.AssertAllDirentTypes(collector, expectedDirents) - s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs) - - lastPid := 0 - dirents := collector.OrderedDirents() - doneSkippingNonTaskDirs := false - for _, d := range dirents { - pid, err := strconv.Atoi(d.Name) - if err != nil { - if !doneSkippingNonTaskDirs { - // We haven't gotten to the task dirs yet. - continue - } - t.Fatalf("Invalid process directory %q", d.Name) - } - doneSkippingNonTaskDirs = true - if lastPid > pid { - t.Errorf("pids not in order: %v", dirents) - } - found := false - for _, t := range tasks { - if k.TaskSet().Root.IDOfTask(t) == kernel.ThreadID(pid) { - found = true - } - } - if !found { - t.Errorf("Additional task ID %d listed: %v", pid, tasks) - } - // Next offset starts at 256+2 ('self' and 'thread-self'), then adds the - // PID, and adds 1 for the next offset. - if want := int64(256 + 2 + pid + 1); d.NextOff != want { - t.Errorf("Wrong dirent offset want: %d got: %d: %+v", want, d.NextOff, d) - } - } - if !doneSkippingNonTaskDirs { - t.Fatalf("Never found any process directories.") - } - - // Test lookup. - for _, path := range []string{"/proc/1", "/proc/2"} { - fd, err := s.VFS.OpenAt( - s.Ctx, - s.Creds, - s.PathOpAtRoot(path), - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt(%q) failed: %v", path, err) - } - defer fd.DecRef(s.Ctx) - buf := make([]byte, 1) - bufIOSeq := usermem.BytesIOSequence(buf) - if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); !linuxerr.Equals(linuxerr.EISDIR, err) { - t.Errorf("wrong error reading directory: %v", err) - } - } - - if _, err := s.VFS.OpenAt( - s.Ctx, - s.Creds, - s.PathOpAtRoot("/proc/9999"), - &vfs.OpenOptions{}, - ); !linuxerr.Equals(linuxerr.ENOENT, err) { - t.Fatalf("wrong error from vfsfs.OpenAt(/proc/9999): %v", err) - } -} - -func TestTasksOffset(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - for i := 0; i < 3; i++ { - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - if _, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root); err != nil { - t.Fatalf("CreateTask(): %v", err) - } - } - - for _, tc := range []struct { - name string - offset int64 - wants map[string]vfs.Dirent - }{ - { - name: "small offset", - offset: 100, - wants: map[string]vfs.Dirent{ - "self": selfLink, - "thread-self": threadSelfLink, - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "offset at start", - offset: 256, - wants: map[string]vfs.Dirent{ - "self": selfLink, - "thread-self": threadSelfLink, - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "skip /proc/self", - offset: 257, - wants: map[string]vfs.Dirent{ - "thread-self": threadSelfLink, - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "skip symlinks", - offset: 258, - wants: map[string]vfs.Dirent{ - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "skip first process", - offset: 260, - wants: map[string]vfs.Dirent{ - "2": proc2, - "3": proc3, - }, - }, - { - name: "last process", - offset: 261, - wants: map[string]vfs.Dirent{ - "3": proc3, - }, - }, - { - name: "after last", - offset: 262, - wants: nil, - }, - { - name: "TaskLimit+1", - offset: kernel.TasksLimit + 1, - wants: nil, - }, - { - name: "max", - offset: math.MaxInt64, - wants: nil, - }, - } { - t.Run(tc.name, func(t *testing.T) { - s := s.WithSubtest(t) - fd, err := s.VFS.OpenAt( - s.Ctx, - s.Creds, - s.PathOpAtRoot("/proc"), - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt(/) failed: %v", err) - } - defer fd.DecRef(s.Ctx) - if _, err := fd.Seek(s.Ctx, tc.offset, linux.SEEK_SET); err != nil { - t.Fatalf("Seek(%d, SEEK_SET): %v", tc.offset, err) - } - - var collector testutil.DirentCollector - if err := fd.IterDirents(s.Ctx, &collector); err != nil { - t.Fatalf("IterDirent(): %v", err) - } - - expectedTypes := make(map[string]testutil.DirentType) - expectedOffsets := make(map[string]int64) - for name, want := range tc.wants { - expectedTypes[name] = want.Type - if want.NextOff != 0 { - expectedOffsets[name] = want.NextOff - } - } - - collector.SkipDotsChecks(true) // We seek()ed past the dots. - s.AssertAllDirentTypes(&collector, expectedTypes) - s.AssertDirentOffsets(&collector, expectedOffsets) - }) - } -} - -func TestTask(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - _, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - - collector := s.ListDirents(s.PathOpAtRoot("/proc/1")) - s.AssertAllDirentTypes(collector, taskStaticFiles) -} - -func TestProcSelf(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - - collector := s.WithTemporaryContext(task.AsyncContext()).ListDirents(&vfs.PathOperation{ - Root: s.Root, - Start: s.Root, - Path: fspath.Parse("/proc/self/"), - FollowFinalSymlink: true, - }) - s.AssertAllDirentTypes(collector, taskStaticFiles) -} - -func iterateDir(ctx context.Context, t *testing.T, s *testutil.System, fd *vfs.FileDescription) { - t.Logf("Iterating: %s", fd.MappedName(ctx)) - - var collector testutil.DirentCollector - if err := fd.IterDirents(ctx, &collector); err != nil { - t.Fatalf("IterDirents(): %v", err) - } - if err := collector.Contains(".", linux.DT_DIR); err != nil { - t.Error(err.Error()) - } - if err := collector.Contains("..", linux.DT_DIR); err != nil { - t.Error(err.Error()) - } - - for _, d := range collector.Dirents() { - if d.Name == "." || d.Name == ".." { - continue - } - absPath := path.Join(fd.MappedName(ctx), d.Name) - if d.Type == linux.DT_LNK { - link, err := s.VFS.ReadlinkAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(absPath)}, - ) - if err != nil { - t.Errorf("vfsfs.ReadlinkAt(%v) failed: %v", absPath, err) - } else { - t.Logf("Skipping symlink: %s => %s", absPath, link) - } - continue - } - - t.Logf("Opening: %s", absPath) - child, err := s.VFS.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(absPath)}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Errorf("vfsfs.OpenAt(%v) failed: %v", absPath, err) - continue - } - defer child.DecRef(ctx) - stat, err := child.Stat(ctx, vfs.StatOptions{}) - if err != nil { - t.Errorf("Stat(%v) failed: %v", absPath, err) - } - if got := linux.FileMode(stat.Mode).DirentType(); got != d.Type { - t.Errorf("wrong file mode, stat: %v, dirent: %v", got, d.Type) - } - if d.Type == linux.DT_DIR { - // Found another dir, let's do it again! - iterateDir(ctx, t, s, child) - } - } -} - -// TestTree iterates all directories and stats every file. -func TestTree(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - - pop := &vfs.PathOperation{ - Root: s.Root, - Start: s.Root, - Path: fspath.Parse("test-file"), - } - opts := &vfs.OpenOptions{ - Flags: linux.O_RDONLY | linux.O_CREAT, - Mode: 0777, - } - file, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, opts) - if err != nil { - t.Fatalf("failed to create test file: %v", err) - } - defer file.DecRef(s.Ctx) - - var tasks []*kernel.Task - for i := 0; i < 5; i++ { - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - // Add file to populate /proc/[pid]/fd and fdinfo directories. - task.FDTable().NewFDVFS2(task.AsyncContext(), 0, file, kernel.FDFlags{}) - tasks = append(tasks, task) - } - - ctx := tasks[0].AsyncContext() - fd, err := s.VFS.OpenAt( - ctx, - auth.CredentialsFromContext(s.Ctx), - &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse("/proc")}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt(/proc) failed: %v", err) - } - iterateDir(ctx, t, s, fd) - fd.DecRef(ctx) -} diff --git a/pkg/sentry/fsimpl/signalfd/BUILD b/pkg/sentry/fsimpl/signalfd/BUILD deleted file mode 100644 index adb610213..000000000 --- a/pkg/sentry/fsimpl/signalfd/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "signalfd", - srcs = ["signalfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/kernel", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fsimpl/signalfd/signalfd_state_autogen.go b/pkg/sentry/fsimpl/signalfd/signalfd_state_autogen.go new file mode 100644 index 000000000..3bf27c6a6 --- /dev/null +++ b/pkg/sentry/fsimpl/signalfd/signalfd_state_autogen.go @@ -0,0 +1,51 @@ +// automatically generated by stateify. + +package signalfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (sfd *SignalFileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/signalfd.SignalFileDescription" +} + +func (sfd *SignalFileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "target", + "mask", + } +} + +func (sfd *SignalFileDescription) beforeSave() {} + +// +checklocksignore +func (sfd *SignalFileDescription) StateSave(stateSinkObject state.Sink) { + sfd.beforeSave() + stateSinkObject.Save(0, &sfd.vfsfd) + stateSinkObject.Save(1, &sfd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &sfd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &sfd.NoLockFD) + stateSinkObject.Save(4, &sfd.target) + stateSinkObject.Save(5, &sfd.mask) +} + +func (sfd *SignalFileDescription) afterLoad() {} + +// +checklocksignore +func (sfd *SignalFileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sfd.vfsfd) + stateSourceObject.Load(1, &sfd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &sfd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &sfd.NoLockFD) + stateSourceObject.Load(4, &sfd.target) + stateSourceObject.Load(5, &sfd.mask) +} + +func init() { + state.Register((*SignalFileDescription)(nil)) +} diff --git a/pkg/sentry/fsimpl/sockfs/BUILD b/pkg/sentry/fsimpl/sockfs/BUILD deleted file mode 100644 index 9defca936..000000000 --- a/pkg/sentry/fsimpl/sockfs/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "sockfs", - srcs = ["sockfs.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - ], -) diff --git a/pkg/sentry/fsimpl/sockfs/sockfs_state_autogen.go b/pkg/sentry/fsimpl/sockfs/sockfs_state_autogen.go new file mode 100644 index 000000000..cf6eddef2 --- /dev/null +++ b/pkg/sentry/fsimpl/sockfs/sockfs_state_autogen.go @@ -0,0 +1,96 @@ +// automatically generated by stateify. + +package sockfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fsType *filesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/sockfs.filesystemType" +} + +func (fsType *filesystemType) StateFields() []string { + return []string{} +} + +func (fsType *filesystemType) beforeSave() {} + +// +checklocksignore +func (fsType *filesystemType) StateSave(stateSinkObject state.Sink) { + fsType.beforeSave() +} + +func (fsType *filesystemType) afterLoad() {} + +// +checklocksignore +func (fsType *filesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/sockfs.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (i *inode) StateTypeName() string { + return "pkg/sentry/fsimpl/sockfs.inode" +} + +func (i *inode) StateFields() []string { + return []string{ + "InodeAttrs", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + } +} + +func (i *inode) beforeSave() {} + +// +checklocksignore +func (i *inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeAttrs) + stateSinkObject.Save(1, &i.InodeNoopRefCount) + stateSinkObject.Save(2, &i.InodeNotDirectory) + stateSinkObject.Save(3, &i.InodeNotSymlink) +} + +func (i *inode) afterLoad() {} + +// +checklocksignore +func (i *inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeAttrs) + stateSourceObject.Load(1, &i.InodeNoopRefCount) + stateSourceObject.Load(2, &i.InodeNotDirectory) + stateSourceObject.Load(3, &i.InodeNotSymlink) +} + +func init() { + state.Register((*filesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inode)(nil)) +} diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD deleted file mode 100644 index 1af0a5cbc..000000000 --- a/pkg/sentry/fsimpl/sys/BUILD +++ /dev/null @@ -1,56 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "dir_refs", - out = "dir_refs.go", - package = "sys", - prefix = "dir", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "dir", - }, -) - -go_library( - name = "sys", - srcs = [ - "dir_refs.go", - "kcov.go", - "sys.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/coverage", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/arch", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "sys_test", - srcs = ["sys_test.go"], - deps = [ - ":sys", - "//pkg/abi/linux", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "@com_github_google_go_cmp//cmp:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/sys/dir_refs.go b/pkg/sentry/fsimpl/sys/dir_refs.go new file mode 100644 index 000000000..17bc43d2e --- /dev/null +++ b/pkg/sentry/fsimpl/sys/dir_refs.go @@ -0,0 +1,140 @@ +package sys + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const direnableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var dirobj *dir + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type dirRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *dirRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *dirRefs) RefType() string { + return fmt.Sprintf("%T", dirobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *dirRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *dirRefs) LogRefs() bool { + return direnableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *dirRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *dirRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if direnableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *dirRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if direnableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *dirRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if direnableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *dirRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/sys/sys_state_autogen.go b/pkg/sentry/fsimpl/sys/sys_state_autogen.go new file mode 100644 index 000000000..c5adf7db3 --- /dev/null +++ b/pkg/sentry/fsimpl/sys/sys_state_autogen.go @@ -0,0 +1,263 @@ +// automatically generated by stateify. + +package sys + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *dirRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.dirRefs" +} + +func (r *dirRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *dirRefs) beforeSave() {} + +// +checklocksignore +func (r *dirRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *dirRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (i *kcovInode) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.kcovInode" +} + +func (i *kcovInode) StateFields() []string { + return []string{ + "InodeAttrs", + "InodeNoopRefCount", + "InodeNotDirectory", + "InodeNotSymlink", + "implStatFS", + } +} + +func (i *kcovInode) beforeSave() {} + +// +checklocksignore +func (i *kcovInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeAttrs) + stateSinkObject.Save(1, &i.InodeNoopRefCount) + stateSinkObject.Save(2, &i.InodeNotDirectory) + stateSinkObject.Save(3, &i.InodeNotSymlink) + stateSinkObject.Save(4, &i.implStatFS) +} + +func (i *kcovInode) afterLoad() {} + +// +checklocksignore +func (i *kcovInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeAttrs) + stateSourceObject.Load(1, &i.InodeNoopRefCount) + stateSourceObject.Load(2, &i.InodeNotDirectory) + stateSourceObject.Load(3, &i.InodeNotSymlink) + stateSourceObject.Load(4, &i.implStatFS) +} + +func (fd *kcovFD) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.kcovFD" +} + +func (fd *kcovFD) StateFields() []string { + return []string{ + "FileDescriptionDefaultImpl", + "NoLockFD", + "vfsfd", + "inode", + "kcov", + } +} + +func (fd *kcovFD) beforeSave() {} + +// +checklocksignore +func (fd *kcovFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(1, &fd.NoLockFD) + stateSinkObject.Save(2, &fd.vfsfd) + stateSinkObject.Save(3, &fd.inode) + stateSinkObject.Save(4, &fd.kcov) +} + +func (fd *kcovFD) afterLoad() {} + +// +checklocksignore +func (fd *kcovFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(1, &fd.NoLockFD) + stateSourceObject.Load(2, &fd.vfsfd) + stateSourceObject.Load(3, &fd.inode) + stateSourceObject.Load(4, &fd.kcov) +} + +func (fsType *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.FilesystemType" +} + +func (fsType *FilesystemType) StateFields() []string { + return []string{} +} + +func (fsType *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fsType *FilesystemType) StateSave(stateSinkObject state.Sink) { + fsType.beforeSave() +} + +func (fsType *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fsType *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "Filesystem", + "devMinor", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.Filesystem) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.Filesystem) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (d *dir) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.dir" +} + +func (d *dir) StateFields() []string { + return []string{ + "dirRefs", + "InodeAlwaysValid", + "InodeAttrs", + "InodeNotSymlink", + "InodeDirectoryNoNewChildren", + "InodeTemporary", + "OrderedChildren", + "locks", + } +} + +func (d *dir) beforeSave() {} + +// +checklocksignore +func (d *dir) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.dirRefs) + stateSinkObject.Save(1, &d.InodeAlwaysValid) + stateSinkObject.Save(2, &d.InodeAttrs) + stateSinkObject.Save(3, &d.InodeNotSymlink) + stateSinkObject.Save(4, &d.InodeDirectoryNoNewChildren) + stateSinkObject.Save(5, &d.InodeTemporary) + stateSinkObject.Save(6, &d.OrderedChildren) + stateSinkObject.Save(7, &d.locks) +} + +func (d *dir) afterLoad() {} + +// +checklocksignore +func (d *dir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.dirRefs) + stateSourceObject.Load(1, &d.InodeAlwaysValid) + stateSourceObject.Load(2, &d.InodeAttrs) + stateSourceObject.Load(3, &d.InodeNotSymlink) + stateSourceObject.Load(4, &d.InodeDirectoryNoNewChildren) + stateSourceObject.Load(5, &d.InodeTemporary) + stateSourceObject.Load(6, &d.OrderedChildren) + stateSourceObject.Load(7, &d.locks) +} + +func (c *cpuFile) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.cpuFile" +} + +func (c *cpuFile) StateFields() []string { + return []string{ + "implStatFS", + "DynamicBytesFile", + "maxCores", + } +} + +func (c *cpuFile) beforeSave() {} + +// +checklocksignore +func (c *cpuFile) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.implStatFS) + stateSinkObject.Save(1, &c.DynamicBytesFile) + stateSinkObject.Save(2, &c.maxCores) +} + +func (c *cpuFile) afterLoad() {} + +// +checklocksignore +func (c *cpuFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.implStatFS) + stateSourceObject.Load(1, &c.DynamicBytesFile) + stateSourceObject.Load(2, &c.maxCores) +} + +func (i *implStatFS) StateTypeName() string { + return "pkg/sentry/fsimpl/sys.implStatFS" +} + +func (i *implStatFS) StateFields() []string { + return []string{} +} + +func (i *implStatFS) beforeSave() {} + +// +checklocksignore +func (i *implStatFS) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *implStatFS) afterLoad() {} + +// +checklocksignore +func (i *implStatFS) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*dirRefs)(nil)) + state.Register((*kcovInode)(nil)) + state.Register((*kcovFD)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*dir)(nil)) + state.Register((*cpuFile)(nil)) + state.Register((*implStatFS)(nil)) +} diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go deleted file mode 100644 index 0a0d914cc..000000000 --- a/pkg/sentry/fsimpl/sys/sys_test.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sys_test - -import ( - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -func newTestSystem(t *testing.T) *testutil.System { - k, err := testutil.Boot() - if err != nil { - t.Fatalf("Failed to create test kernel: %v", err) - } - ctx := k.SupervisorContext() - creds := auth.CredentialsFromContext(ctx) - k.VFS().MustRegisterFilesystemType(sys.Name, sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - - mns, err := k.VFS().NewMountNamespace(ctx, creds, "", sys.Name, &vfs.MountOptions{}) - if err != nil { - t.Fatalf("Failed to create new mount namespace: %v", err) - } - return testutil.NewSystem(ctx, t, k.VFS(), mns) -} - -func TestReadCPUFile(t *testing.T) { - s := newTestSystem(t) - defer s.Destroy() - k := kernel.KernelFromContext(s.Ctx) - maxCPUCores := k.ApplicationCores() - - expected := fmt.Sprintf("0-%d\n", maxCPUCores-1) - - for _, fname := range []string{"online", "possible", "present"} { - pop := s.PathOpAtRoot(fmt.Sprintf("devices/system/cpu/%s", fname)) - fd, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, &vfs.OpenOptions{}) - if err != nil { - t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err) - } - defer fd.DecRef(s.Ctx) - content, err := s.ReadToEnd(fd) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - if diff := cmp.Diff(expected, content); diff != "" { - t.Fatalf("Read returned unexpected data:\n--- want\n+++ got\n%v", diff) - } - } -} - -func TestSysRootContainsExpectedEntries(t *testing.T) { - s := newTestSystem(t) - defer s.Destroy() - pop := s.PathOpAtRoot("/") - s.AssertAllDirentTypes(s.ListDirents(pop), map[string]testutil.DirentType{ - "block": linux.DT_DIR, - "bus": linux.DT_DIR, - "class": linux.DT_DIR, - "dev": linux.DT_DIR, - "devices": linux.DT_DIR, - "firmware": linux.DT_DIR, - "fs": linux.DT_DIR, - "kernel": linux.DT_DIR, - "module": linux.DT_DIR, - "power": linux.DT_DIR, - }) -} diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD deleted file mode 100644 index b3f9d1010..000000000 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ /dev/null @@ -1,38 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "testutil", - testonly = 1, - srcs = [ - "kernel.go", - "testutil.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/cpuid", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/memutil", - "//pkg/sentry/fsbridge", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/limits", - "//pkg/sentry/loader", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", - "//pkg/sentry/time", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/usermem", - "@com_github_google_go_cmp//cmp:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go deleted file mode 100644 index 473b41cff..000000000 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutil - -import ( - "flag" - "fmt" - "os" - "runtime" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/cpuid" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/sentry/fsbridge" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/sched" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/loader" - "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/time" - "gvisor.dev/gvisor/pkg/sentry/vfs" - - // Platforms are plugable. - _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm" - _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" -) - -var ( - platformFlag = flag.String("platform", "ptrace", "specify which platform to use") -) - -// Boot initializes a new bare bones kernel for test. -func Boot() (*kernel.Kernel, error) { - platformCtr, err := platform.Lookup(*platformFlag) - if err != nil { - return nil, fmt.Errorf("platform not found: %v", err) - } - deviceFile, err := platformCtr.OpenDevice() - if err != nil { - return nil, fmt.Errorf("creating platform: %v", err) - } - plat, err := platformCtr.New(deviceFile) - if err != nil { - return nil, fmt.Errorf("creating platform: %v", err) - } - - kernel.VFS2Enabled = true - k := &kernel.Kernel{ - Platform: plat, - } - - mf, err := createMemoryFile() - if err != nil { - return nil, err - } - k.SetMemoryFile(mf) - - // Pass k as the platform since it is savable, unlike the actual platform. - vdso, err := loader.PrepareVDSO(k) - if err != nil { - return nil, fmt.Errorf("creating vdso: %v", err) - } - - // Create timekeeper. - tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - tk.SetClocks(time.NewCalibratedClocks()) - - creds := auth.NewRootCredentials(auth.NewRootUserNamespace()) - - // Initiate the Kernel object, which is required by the Context passed - // to createVFS in order to mount (among other things) procfs. - if err = k.Init(kernel.InitKernelArgs{ - ApplicationCores: uint(runtime.GOMAXPROCS(-1)), - FeatureSet: cpuid.HostFeatureSet(), - Timekeeper: tk, - RootUserNamespace: creds.UserNamespace, - Vdso: vdso, - RootUTSNamespace: kernel.NewUTSNamespace("hostname", "domain", creds.UserNamespace), - RootIPCNamespace: kernel.NewIPCNamespace(creds.UserNamespace), - RootAbstractSocketNamespace: kernel.NewAbstractSocketNamespace(), - PIDNamespace: kernel.NewRootPIDNamespace(creds.UserNamespace), - }); err != nil { - return nil, fmt.Errorf("initializing kernel: %v", err) - } - - k.VFS().MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - AllowUserList: true, - }) - - ls, err := limits.NewLinuxLimitSet() - if err != nil { - return nil, err - } - tg := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, ls) - k.TestOnlySetGlobalInit(tg) - - return k, nil -} - -// CreateTask creates a new bare bones task for tests. -func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup, mntns *vfs.MountNamespace, root, cwd vfs.VirtualDentry) (*kernel.Task, error) { - k := kernel.KernelFromContext(ctx) - if k == nil { - return nil, fmt.Errorf("cannot find kernel from context") - } - - exe, err := newFakeExecutable(ctx, k.VFS(), auth.CredentialsFromContext(ctx), root) - if err != nil { - return nil, err - } - m := mm.NewMemoryManager(k, k, k.SleepForAddressSpaceActivation) - m.SetExecutable(ctx, fsbridge.NewVFSFile(exe)) - - config := &kernel.TaskConfig{ - Kernel: k, - ThreadGroup: tc, - TaskImage: &kernel.TaskImage{Name: name, MemoryManager: m}, - Credentials: auth.CredentialsFromContext(ctx), - NetworkNamespace: k.RootNetworkNamespace(), - AllowedCPUMask: sched.NewFullCPUSet(k.ApplicationCores()), - UTSNamespace: kernel.UTSNamespaceFromContext(ctx), - IPCNamespace: kernel.IPCNamespaceFromContext(ctx), - AbstractSocketNamespace: kernel.NewAbstractSocketNamespace(), - MountNamespaceVFS2: mntns, - FSContext: kernel.NewFSContextVFS2(root, cwd, 0022), - FDTable: k.NewFDTable(), - } - t, err := k.TaskSet().NewTask(ctx, config) - if err != nil { - config.ThreadGroup.Release(ctx) - return nil, err - } - return t, nil -} - -func newFakeExecutable(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry) (*vfs.FileDescription, error) { - const name = "executable" - pop := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(name), - } - opts := &vfs.OpenOptions{ - Flags: linux.O_RDONLY | linux.O_CREAT, - Mode: 0777, - } - return vfsObj.OpenAt(ctx, creds, pop, opts) -} - -func createMemoryFile() (*pgalloc.MemoryFile, error) { - const memfileName = "test-memory" - memfd, err := memutil.CreateMemFD(memfileName, 0) - if err != nil { - return nil, fmt.Errorf("error creating memfd: %v", err) - } - memfile := os.NewFile(uintptr(memfd), memfileName) - mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) - if err != nil { - _ = memfile.Close() - return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %v", err) - } - return mf, nil -} diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go deleted file mode 100644 index 59e6f9c92..000000000 --- a/pkg/sentry/fsimpl/testutil/testutil.go +++ /dev/null @@ -1,288 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testutil provides common test utilities for kernfs-based -// filesystems. -package testutil - -import ( - "fmt" - "io" - "strings" - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" - - "gvisor.dev/gvisor/pkg/hostarch" -) - -// System represents the context for a single test. -// -// Test systems must be explicitly destroyed with System.Destroy. -type System struct { - t *testing.T - Ctx context.Context - Creds *auth.Credentials - VFS *vfs.VirtualFilesystem - Root vfs.VirtualDentry - MntNs *vfs.MountNamespace -} - -// NewSystem constructs a System. -// -// Precondition: Caller must hold a reference on mns, whose ownership -// is transferred to the new System. -func NewSystem(ctx context.Context, t *testing.T, v *vfs.VirtualFilesystem, mns *vfs.MountNamespace) *System { - root := mns.Root() - root.IncRef() - s := &System{ - t: t, - Ctx: ctx, - Creds: auth.CredentialsFromContext(ctx), - VFS: v, - MntNs: mns, - Root: root, - } - return s -} - -// WithSubtest creates a temporary test system with a new test harness, -// referencing all other resources from the original system. This is useful when -// a system is reused for multiple subtests, and the T needs to change for each -// case. Note that this is safe when test cases run in parallel, as all -// resources referenced by the system are immutable, or handle interior -// mutations in a thread-safe manner. -// -// The returned system must not outlive the original and should not be destroyed -// via System.Destroy. -func (s *System) WithSubtest(t *testing.T) *System { - return &System{ - t: t, - Ctx: s.Ctx, - Creds: s.Creds, - VFS: s.VFS, - MntNs: s.MntNs, - Root: s.Root, - } -} - -// WithTemporaryContext constructs a temporary test system with a new context -// ctx. The temporary system borrows all resources and references from the -// original system. The returned temporary system must not outlive the original -// system, and should not be destroyed via System.Destroy. -func (s *System) WithTemporaryContext(ctx context.Context) *System { - return &System{ - t: s.t, - Ctx: ctx, - Creds: s.Creds, - VFS: s.VFS, - MntNs: s.MntNs, - Root: s.Root, - } -} - -// Destroy release resources associated with a test system. -func (s *System) Destroy() { - s.Root.DecRef(s.Ctx) - s.MntNs.DecRef(s.Ctx) // Reference on MntNs passed to NewSystem. -} - -// ReadToEnd reads the contents of fd until EOF to a string. -func (s *System) ReadToEnd(fd *vfs.FileDescription) (string, error) { - buf := make([]byte, hostarch.PageSize) - bufIOSeq := usermem.BytesIOSequence(buf) - opts := vfs.ReadOptions{} - - var content strings.Builder - for { - n, err := fd.Read(s.Ctx, bufIOSeq, opts) - if n == 0 || err != nil { - if err == io.EOF { - err = nil - } - return content.String(), err - } - content.Write(buf[:n]) - } -} - -// PathOpAtRoot constructs a PathOperation with the given path from -// the root of the filesystem. -func (s *System) PathOpAtRoot(path string) *vfs.PathOperation { - return &vfs.PathOperation{ - Root: s.Root, - Start: s.Root, - Path: fspath.Parse(path), - } -} - -// GetDentryOrDie attempts to resolve a dentry referred to by the -// provided path operation. If unsuccessful, the test fails. -func (s *System) GetDentryOrDie(pop *vfs.PathOperation) vfs.VirtualDentry { - vd, err := s.VFS.GetDentryAt(s.Ctx, s.Creds, pop, &vfs.GetDentryOptions{}) - if err != nil { - s.t.Fatalf("GetDentryAt(pop:%+v) failed: %v", pop, err) - } - return vd -} - -// DirentType is an alias for values for linux_dirent64.d_type. -type DirentType = uint8 - -// ListDirents lists the Dirents for a directory at pop. -func (s *System) ListDirents(pop *vfs.PathOperation) *DirentCollector { - fd, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, &vfs.OpenOptions{Flags: linux.O_RDONLY}) - if err != nil { - s.t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) - } - defer fd.DecRef(s.Ctx) - - collector := &DirentCollector{} - if err := fd.IterDirents(s.Ctx, collector); err != nil { - s.t.Fatalf("IterDirent failed: %v", err) - } - return collector -} - -// AssertAllDirentTypes verifies that the set of dirents in collector contains -// exactly the specified set of expected entries. AssertAllDirentTypes respects -// collector.skipDots, and implicitly checks for "." and ".." accordingly. -func (s *System) AssertAllDirentTypes(collector *DirentCollector, expected map[string]DirentType) { - if expected == nil { - expected = make(map[string]DirentType) - } - // Also implicitly check for "." and "..", if enabled. - if !collector.skipDots { - expected["."] = linux.DT_DIR - expected[".."] = linux.DT_DIR - } - - dentryTypes := make(map[string]DirentType) - collector.mu.Lock() - for _, dirent := range collector.dirents { - dentryTypes[dirent.Name] = dirent.Type - } - collector.mu.Unlock() - if diff := cmp.Diff(expected, dentryTypes); diff != "" { - s.t.Fatalf("IterDirent had unexpected results:\n--- want\n+++ got\n%v", diff) - } -} - -// AssertDirentOffsets verifies that collector contains at least the entries -// specified in expected, with the given NextOff field. Entries specified in -// expected but missing from collector result in failure. Extra entries in -// collector are ignored. AssertDirentOffsets respects collector.skipDots, and -// implicitly checks for "." and ".." accordingly. -func (s *System) AssertDirentOffsets(collector *DirentCollector, expected map[string]int64) { - // Also implicitly check for "." and "..", if enabled. - if !collector.skipDots { - expected["."] = 1 - expected[".."] = 2 - } - - dentryNextOffs := make(map[string]int64) - collector.mu.Lock() - for _, dirent := range collector.dirents { - // Ignore extra entries in dentries that are not in expected. - if _, ok := expected[dirent.Name]; ok { - dentryNextOffs[dirent.Name] = dirent.NextOff - } - } - collector.mu.Unlock() - if diff := cmp.Diff(expected, dentryNextOffs); diff != "" { - s.t.Fatalf("IterDirent had unexpected results:\n--- want\n+++ got\n%v", diff) - } -} - -// DirentCollector provides an implementation for vfs.IterDirentsCallback for -// testing. It simply iterates to the end of a given directory FD and collects -// all dirents emitted by the callback. -type DirentCollector struct { - mu sync.Mutex - order []*vfs.Dirent - dirents map[string]*vfs.Dirent - // When the collector is used in various Assert* functions, should "." and - // ".." be implicitly checked? - skipDots bool -} - -// SkipDotsChecks enables or disables the implicit checks on "." and ".." when -// the collector is used in various Assert* functions. Note that "." and ".." -// are still collected if passed to d.Handle, so the caller should only disable -// the checks when they aren't expected. -func (d *DirentCollector) SkipDotsChecks(value bool) { - d.skipDots = value -} - -// Handle implements vfs.IterDirentsCallback.Handle. -func (d *DirentCollector) Handle(dirent vfs.Dirent) error { - d.mu.Lock() - if d.dirents == nil { - d.dirents = make(map[string]*vfs.Dirent) - } - d.order = append(d.order, &dirent) - d.dirents[dirent.Name] = &dirent - d.mu.Unlock() - return nil -} - -// Count returns the number of dirents currently in the collector. -func (d *DirentCollector) Count() int { - d.mu.Lock() - defer d.mu.Unlock() - return len(d.dirents) -} - -// Contains checks whether the collector has a dirent with the given name and -// type. -func (d *DirentCollector) Contains(name string, typ uint8) error { - d.mu.Lock() - defer d.mu.Unlock() - dirent, ok := d.dirents[name] - if !ok { - return fmt.Errorf("no dirent named %q found", name) - } - if dirent.Type != typ { - return fmt.Errorf("dirent named %q found, but was expecting type %s, got: %+v", name, linux.DirentType.Parse(uint64(typ)), dirent) - } - return nil -} - -// Dirents returns all dirents discovered by this collector. -func (d *DirentCollector) Dirents() map[string]*vfs.Dirent { - d.mu.Lock() - dirents := make(map[string]*vfs.Dirent) - for n, d := range d.dirents { - dirents[n] = d - } - d.mu.Unlock() - return dirents -} - -// OrderedDirents returns an ordered list of dirents as discovered by this -// collector. -func (d *DirentCollector) OrderedDirents() []*vfs.Dirent { - d.mu.Lock() - dirents := make([]*vfs.Dirent, len(d.order)) - copy(dirents, d.order) - d.mu.Unlock() - return dirents -} diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD deleted file mode 100644 index e6980a314..000000000 --- a/pkg/sentry/fsimpl/timerfd/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "timerfd", - srcs = ["timerfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/kernel/time", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fsimpl/timerfd/timerfd_state_autogen.go b/pkg/sentry/fsimpl/timerfd/timerfd_state_autogen.go new file mode 100644 index 000000000..12970f25c --- /dev/null +++ b/pkg/sentry/fsimpl/timerfd/timerfd_state_autogen.go @@ -0,0 +1,54 @@ +// automatically generated by stateify. + +package timerfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (tfd *TimerFileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/timerfd.TimerFileDescription" +} + +func (tfd *TimerFileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "events", + "timer", + "val", + } +} + +func (tfd *TimerFileDescription) beforeSave() {} + +// +checklocksignore +func (tfd *TimerFileDescription) StateSave(stateSinkObject state.Sink) { + tfd.beforeSave() + stateSinkObject.Save(0, &tfd.vfsfd) + stateSinkObject.Save(1, &tfd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &tfd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &tfd.NoLockFD) + stateSinkObject.Save(4, &tfd.events) + stateSinkObject.Save(5, &tfd.timer) + stateSinkObject.Save(6, &tfd.val) +} + +func (tfd *TimerFileDescription) afterLoad() {} + +// +checklocksignore +func (tfd *TimerFileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tfd.vfsfd) + stateSourceObject.Load(1, &tfd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &tfd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &tfd.NoLockFD) + stateSourceObject.Load(4, &tfd.events) + stateSourceObject.Load(5, &tfd.timer) + stateSourceObject.Load(6, &tfd.val) +} + +func init() { + state.Register((*TimerFileDescription)(nil)) +} diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD deleted file mode 100644 index dc8b9bfeb..000000000 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ /dev/null @@ -1,131 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "dentry_list", - out = "dentry_list.go", - package = "tmpfs", - prefix = "dentry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dentry", - "Linker": "*dentry", - }, -) - -go_template_instance( - name = "fstree", - out = "fstree.go", - package = "tmpfs", - prefix = "generic", - template = "//pkg/sentry/vfs/genericfstree:generic_fstree", - types = { - "Dentry": "dentry", - }, -) - -go_template_instance( - name = "inode_refs", - out = "inode_refs.go", - package = "tmpfs", - prefix = "inode", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "inode", - }, -) - -go_library( - name = "tmpfs", - srcs = [ - "dentry_list.go", - "device_file.go", - "directory.go", - "filesystem.go", - "fstree.go", - "inode_refs.go", - "named_pipe.go", - "regular_file.go", - "save_restore.go", - "socket_file.go", - "symlink.go", - "tmpfs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsmetric", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/sentry/vfs/memxattr", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "benchmark_test", - size = "small", - srcs = ["benchmark_test.go"], - deps = [ - ":tmpfs", - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/refs", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - ], -) - -go_test( - name = "tmpfs_test", - size = "small", - srcs = [ - "pipe_test.go", - "regular_file_test.go", - "stat_test.go", - "tmpfs_test.go", - ], - library = ":tmpfs", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go deleted file mode 100644 index 2c29343c1..000000000 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package benchmark_test - -import ( - "fmt" - "runtime" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// Differences from stat_benchmark: -// -// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are -// not included. -// -// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp. -// Non-MountStat benchmarks use a tmpfs root mount and no submounts. -// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a -// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus -// stat_benchmark at depth 1 does a comparable amount of work to *MountStat -// benchmarks at depth 2, and non-MountStat benchmarks at depth 3. -var depths = []int{1, 2, 3, 8, 64, 100} - -const ( - mountPointName = "tmp" - filename = "gvisor_test_temp_0_1557494568" -) - -// This is copied from syscalls/linux/sys_file.go, with the dependency on -// kernel.Task stripped out. -func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { - var ( - d *fs.Dirent // The file. - rel *fs.Dirent // The relative directory for search (if required.) - err error - ) - - // Extract the working directory (maybe). - if len(path) > 0 && path[0] == '/' { - // Absolute path; rel can be nil. - } else if dirFD == linux.AT_FDCWD { - // Need to reference the working directory. - rel = wd - } else { - // Need to extract the given FD. - return linuxerr.EBADF - } - - // Lookup the node. - remainingTraversals := uint(linux.MaxSymlinkTraversals) - if resolve { - d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals) - } else { - d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals) - } - if err != nil { - return err - } - - err = fn(root, d) - d.DecRef(ctx) - return err -} - -func BenchmarkVFS1TmpfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef(ctx) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef(ctx) - d := root - d.IncRef() - defer d.DecRef(ctx) - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef(ctx) - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef(ctx) - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return linuxerr.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS2TmpfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - b.Fatalf("VFS init: %v", err) - } - vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - defer mntns.DecRef(ctx) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - root.IncRef() - defer root.DecRef(ctx) - vd := root - vd.IncRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(name), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef(ctx) - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - vd.DecRef(ctx) - vd = vfs.VirtualDentry{} - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - defer fd.DecRef(ctx) - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS1TmpfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef(ctx) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create and mount the submount. - root := mntns.Root() - defer root.DecRef(ctx) - if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - mountPoint, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef(ctx) - submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs submount: %v", err) - } - if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - d, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - defer d.DecRef(ctx) - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef(ctx) - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef(ctx) - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return linuxerr.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS2TmpfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - b.Fatalf("VFS init: %v", err) - } - vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - defer mntns.DecRef(ctx) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create the mount point. - root := mntns.Root() - root.IncRef() - defer root.DecRef(ctx) - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(mountPointName), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - // Save the mount point for later use. - mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef(ctx) - // Create and mount the submount. - if _, err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(name), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef(ctx) - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - vd.DecRef(ctx) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - fd.DecRef(ctx) - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func init() { - // Turn off reference leak checking for a fair comparison between vfs1 and - // vfs2. - refs.SetLeakMode(refs.NoLeakChecking) -} diff --git a/pkg/sentry/fsimpl/tmpfs/dentry_list.go b/pkg/sentry/fsimpl/tmpfs/dentry_list.go new file mode 100644 index 000000000..b95dd7101 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/dentry_list.go @@ -0,0 +1,221 @@ +package tmpfs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type dentryElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (dentryElementMapper) linkerFor(elem *dentry) *dentry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type dentryList struct { + head *dentry + tail *dentry +} + +// Reset resets list l to the empty state. +func (l *dentryList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *dentryList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *dentryList) Front() *dentry { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *dentryList) Back() *dentry { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *dentryList) Len() (count int) { + for e := l.Front(); e != nil; e = (dentryElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *dentryList) PushFront(e *dentry) { + linker := dentryElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + dentryElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *dentryList) PushBack(e *dentry) { + linker := dentryElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + dentryElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *dentryList) PushBackList(m *dentryList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + dentryElementMapper{}.linkerFor(l.tail).SetNext(m.head) + dentryElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *dentryList) InsertAfter(b, e *dentry) { + bLinker := dentryElementMapper{}.linkerFor(b) + eLinker := dentryElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + dentryElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *dentryList) InsertBefore(a, e *dentry) { + aLinker := dentryElementMapper{}.linkerFor(a) + eLinker := dentryElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + dentryElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *dentryList) Remove(e *dentry) { + linker := dentryElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + dentryElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + dentryElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type dentryEntry struct { + next *dentry + prev *dentry +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *dentryEntry) Next() *dentry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *dentryEntry) Prev() *dentry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *dentryEntry) SetNext(elem *dentry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *dentryEntry) SetPrev(elem *dentry) { + e.prev = elem +} diff --git a/pkg/sentry/fsimpl/tmpfs/fstree.go b/pkg/sentry/fsimpl/tmpfs/fstree.go new file mode 100644 index 000000000..d46351488 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/fstree.go @@ -0,0 +1,55 @@ +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// IsAncestorDentry returns true if d is an ancestor of d2; that is, d is +// either d2's parent or an ancestor of d2's parent. +func genericIsAncestorDentry(d, d2 *dentry) bool { + for d2 != nil { + if d2.parent == d { + return true + } + if d2.parent == d2 { + return false + } + d2 = d2.parent + } + return false +} + +// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. +func genericParentOrSelf(d *dentry) *dentry { + if d.parent != nil { + return d.parent + } + return d +} + +// PrependPath is a generic implementation of FilesystemImpl.PrependPath(). +func genericPrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *dentry, b *fspath.Builder) error { + for { + if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { + return vfs.PrependPathAtVFSRootError{} + } + if mnt != nil && &d.vfsd == mnt.Root() { + return nil + } + if d.parent == nil { + return vfs.PrependPathAtNonMountRootError{} + } + b.PrependComponent(d.name) + d = d.parent + } +} + +// DebugPathname returns a pathname to d relative to its filesystem root. +// DebugPathname does not correspond to any Linux function; it's used to +// generate dentry pathnames for debugging. +func genericDebugPathname(d *dentry) string { + var b fspath.Builder + _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) + return b.String() +} diff --git a/pkg/sentry/fsimpl/tmpfs/inode_refs.go b/pkg/sentry/fsimpl/tmpfs/inode_refs.go new file mode 100644 index 000000000..f0f032e0c --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/inode_refs.go @@ -0,0 +1,140 @@ +package tmpfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const inodeenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var inodeobj *inode + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type inodeRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *inodeRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *inodeRefs) RefType() string { + return fmt.Sprintf("%T", inodeobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *inodeRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *inodeRefs) LogRefs() bool { + return inodeenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *inodeRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *inodeRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if inodeenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *inodeRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if inodeenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *inodeRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if inodeenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *inodeRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go deleted file mode 100644 index 418c7994e..000000000 --- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -const fileName = "mypipe" - -func TestSeparateFDs(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef(ctx) - - // Open the read side. This is done in a concurrently because opening - // One end the pipe blocks until the other end is opened. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - rfdchan := make(chan *vfs.FileDescription) - go func() { - openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY} - rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - rfdchan <- rfd - }() - - // Open the write side. - openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY} - wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer wfd.DecRef(ctx) - - rfd, ok := <-rfdchan - if !ok { - t.Fatalf("failed to open pipe for reading %q", fileName) - } - defer rfd.DecRef(ctx) - - const msg = "vamos azul" - checkEmpty(ctx, t, rfd) - checkWrite(ctx, t, wfd, msg) - checkRead(ctx, t, rfd, msg) -} - -func TestNonblockingRead(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef(ctx) - - // Open the read side as nonblocking. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK} - rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for reading %q: %v", fileName, err) - } - defer rfd.DecRef(ctx) - - // Open the write side. - openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY} - wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer wfd.DecRef(ctx) - - const msg = "geh blau" - checkEmpty(ctx, t, rfd) - checkWrite(ctx, t, wfd, msg) - checkRead(ctx, t, rfd, msg) -} - -func TestNonblockingWriteError(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef(ctx) - - // Open the write side as nonblocking, which should return ENXIO. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} - _, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if !linuxerr.Equals(linuxerr.ENXIO, err) { - t.Fatalf("expected ENXIO, but got error: %v", err) - } -} - -func TestSingleFD(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef(ctx) - - // Open the pipe as readable and writable. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_RDWR} - fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer fd.DecRef(ctx) - - const msg = "forza blu" - checkEmpty(ctx, t, fd) - checkWrite(ctx, t, fd, msg) - checkRead(ctx, t, fd, msg) -} - -// setup creates a VFS with a pipe in the root directory at path fileName. The -// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal -// upon failure. -func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) { - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{}) - if err != nil { - t.Fatalf("failed to create tmpfs root mount: %v", err) - } - - // Create the pipe. - root := mntns.Root() - root.IncRef() - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - } - mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644} - if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil { - t.Fatalf("failed to create file %q: %v", fileName, err) - } - - // Sanity check: the file pipe exists and has the correct mode. - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - t.Fatalf("stat(%q) failed: %v", fileName, err) - } - if stat.Mode&^linux.S_IFMT != 0644 { - t.Errorf("got wrong permissions (%0o)", stat.Mode) - } - if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe { - t.Errorf("got wrong file type (%0o)", stat.Mode) - } - - return ctx, creds, vfsObj, root -} - -// checkEmpty calls t.Fatal if the pipe in fd is not empty. -func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) { - readData := make([]byte, 1) - dst := usermem.BytesIOSequence(readData) - bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != syserror.ErrWouldBlock { - t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err) - } - if bytesRead != 0 { - t.Fatalf("expected to read 0 bytes, but got %d", bytesRead) - } -} - -// checkWrite calls t.Fatal if it fails to write all of msg to fd. -func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { - writeData := []byte(msg) - src := usermem.BytesIOSequence(writeData) - bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{}) - if err != nil { - t.Fatalf("error writing to pipe %q: %v", fileName, err) - } - if bytesWritten != int64(len(writeData)) { - t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten) - } -} - -// checkRead calls t.Fatal if it fails to read msg from fd. -func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { - readData := make([]byte, len(msg)) - dst := usermem.BytesIOSequence(readData) - bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != nil { - t.Fatalf("error reading from pipe %q: %v", fileName, err) - } - if bytesRead != int64(len(msg)) { - t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead) - } - if !bytes.Equal(readData, []byte(msg)) { - t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData)) - } -} diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go deleted file mode 100644 index 4393cc13b..000000000 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// Test that we can write some data to a file and read it back.` -func TestSimpleWriteRead(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, 0644) - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Write. - data := []byte("foobarbaz") - n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) - } - if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want { - t.Errorf("fd.Write left offset at %d, want %d", got, want) - } - - // Seek back to beginning. - if _, err := fd.Seek(ctx, 0, linux.SEEK_SET); err != nil { - t.Fatalf("fd.Seek failed: %v", err) - } - if got, want := fd.Impl().(*regularFileFD).off, int64(0); got != want { - t.Errorf("fd.Seek(0) left offset at %d, want %d", got, want) - } - - // Read. - buf := make([]byte, len(data)) - n, err = fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) - if err != nil && err != io.EOF { - t.Fatalf("fd.Read failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Read got short read length %d, want %d", n, len(data)) - } - if got, want := string(buf), string(data); got != want { - t.Errorf("Read got %q want %s", got, want) - } - if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want { - t.Errorf("fd.Write left offset at %d, want %d", got, want) - } -} - -func TestPWrite(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, 0644) - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Fill file with 1k 'a's. - data := bytes.Repeat([]byte{'a'}, 1000) - n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) - } - - // Write "gVisor is awesome" at various offsets. - buf := []byte("gVisor is awesome") - offsets := []int{0, 1, 2, 10, 20, 50, 100, len(data) - 100, len(data) - 1, len(data), len(data) + 1} - for _, offset := range offsets { - name := fmt.Sprintf("PWrite offset=%d", offset) - t.Run(name, func(t *testing.T) { - n, err := fd.PWrite(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.WriteOptions{}) - if err != nil { - t.Errorf("fd.PWrite got err %v want nil", err) - } - if n != int64(len(buf)) { - t.Errorf("fd.PWrite got %d bytes want %d", n, len(buf)) - } - - // Update data to reflect expected file contents. - if len(data) < offset+len(buf) { - data = append(data, make([]byte, (offset+len(buf))-len(data))...) - } - copy(data[offset:], buf) - - // Read the whole file and compare with data. - readBuf := make([]byte, len(data)) - n, err = fd.PRead(ctx, usermem.BytesIOSequence(readBuf), 0, vfs.ReadOptions{}) - if err != nil { - t.Fatalf("fd.PRead failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.PRead got short read length %d, want %d", n, len(data)) - } - if got, want := string(readBuf), string(data); got != want { - t.Errorf("PRead got %q want %s", got, want) - } - - }) - } -} - -func TestLocks(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, 0644) - if err != nil { - t.Fatal(err) - } - defer cleanup() - - uid1 := 123 - uid2 := 456 - if err := fd.Impl().LockBSD(ctx, uid1, 0 /* ownerPID */, lock.ReadLock, nil); err != nil { - t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) - } - if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, nil); err != nil { - t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) - } - if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want { - t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want) - } - if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil { - t.Fatalf("fd.Impl().UnlockBSD failed: err = %v", err) - } - if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil); err != nil { - t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) - } - - if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil { - t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) - } - if err := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 1, End: 2}, nil); err != nil { - t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) - } - if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.WriteLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil { - t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) - } - if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), syserror.ErrWouldBlock; got != want { - t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want) - } - if err := fd.Impl().UnlockPOSIX(ctx, uid1, lock.LockRange{Start: 0, End: 1}); err != nil { - t.Fatalf("fd.Impl().UnlockPOSIX failed: err = %v", err) - } -} - -func TestPRead(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, 0644) - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Write 100 sequences of 'gVisor is awesome'. - data := bytes.Repeat([]byte("gVisor is awsome"), 100) - n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) - } - - // Read various sizes from various offsets. - sizes := []int{0, 1, 2, 10, 20, 50, 100, 1000} - offsets := []int{0, 1, 2, 10, 20, 50, 100, 1000, len(data) - 100, len(data) - 1, len(data), len(data) + 1} - - for _, size := range sizes { - for _, offset := range offsets { - name := fmt.Sprintf("PRead offset=%d size=%d", offset, size) - t.Run(name, func(t *testing.T) { - var ( - wantRead []byte - wantErr error - ) - if offset < len(data) { - wantRead = data[offset:] - } else if size > 0 { - wantErr = io.EOF - } - if offset+size < len(data) { - wantRead = wantRead[:size] - } - buf := make([]byte, size) - n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.ReadOptions{}) - if err != wantErr { - t.Errorf("fd.PRead got err %v want %v", err, wantErr) - } - if n != int64(len(wantRead)) { - t.Errorf("fd.PRead got %d bytes want %d", n, len(wantRead)) - } - if got := string(buf[:n]); got != string(wantRead) { - t.Errorf("fd.PRead got %q want %q", got, string(wantRead)) - } - }) - } - } -} - -func TestTruncate(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, 0644) - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Fill the file with some data. - data := bytes.Repeat([]byte("gVisor is awsome"), 100) - written, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - - // Size should be same as written. - sizeStatOpts := vfs.StatOptions{Mask: linux.STATX_SIZE} - stat, err := fd.Stat(ctx, sizeStatOpts) - if err != nil { - t.Fatalf("fd.Stat failed: %v", err) - } - if got, want := int64(stat.Size), written; got != want { - t.Errorf("fd.Stat got size %d, want %d", got, want) - } - - // Truncate down. - newSize := uint64(10) - if err := fd.SetStat(ctx, vfs.SetStatOptions{ - Stat: linux.Statx{ - Mask: linux.STATX_SIZE, - Size: newSize, - }, - }); err != nil { - t.Errorf("fd.Truncate failed: %v", err) - } - // Size should be updated. - statAfterTruncateDown, err := fd.Stat(ctx, sizeStatOpts) - if err != nil { - t.Fatalf("fd.Stat failed: %v", err) - } - if got, want := statAfterTruncateDown.Size, newSize; got != want { - t.Errorf("fd.Stat got size %d, want %d", got, want) - } - // We should only read newSize worth of data. - buf := make([]byte, 1000) - if n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0, vfs.ReadOptions{}); err != nil && err != io.EOF { - t.Fatalf("fd.PRead failed: %v", err) - } else if uint64(n) != newSize { - t.Errorf("fd.PRead got size %d, want %d", n, newSize) - } - // Mtime and Ctime should be bumped. - if got := statAfterTruncateDown.Mtime.ToNsec(); got <= stat.Mtime.ToNsec() { - t.Errorf("fd.Stat got Mtime %v, want > %v", got, stat.Mtime) - } - if got := statAfterTruncateDown.Ctime.ToNsec(); got <= stat.Ctime.ToNsec() { - t.Errorf("fd.Stat got Ctime %v, want > %v", got, stat.Ctime) - } - - // Truncate up. - newSize = 100 - if err := fd.SetStat(ctx, vfs.SetStatOptions{ - Stat: linux.Statx{ - Mask: linux.STATX_SIZE, - Size: newSize, - }, - }); err != nil { - t.Errorf("fd.Truncate failed: %v", err) - } - // Size should be updated. - statAfterTruncateUp, err := fd.Stat(ctx, sizeStatOpts) - if err != nil { - t.Fatalf("fd.Stat failed: %v", err) - } - if got, want := statAfterTruncateUp.Size, newSize; got != want { - t.Errorf("fd.Stat got size %d, want %d", got, want) - } - // We should read newSize worth of data. - buf = make([]byte, 1000) - if n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0, vfs.ReadOptions{}); err != nil && err != io.EOF { - t.Fatalf("fd.PRead failed: %v", err) - } else if uint64(n) != newSize { - t.Errorf("fd.PRead got size %d, want %d", n, newSize) - } - // Bytes should be null after 10, since we previously truncated to 10. - for i := uint64(10); i < newSize; i++ { - if buf[i] != 0 { - t.Errorf("fd.PRead got byte %d=%x, want 0", i, buf[i]) - break - } - } - // Mtime and Ctime should be bumped. - if got := statAfterTruncateUp.Mtime.ToNsec(); got <= statAfterTruncateDown.Mtime.ToNsec() { - t.Errorf("fd.Stat got Mtime %v, want > %v", got, statAfterTruncateDown.Mtime) - } - if got := statAfterTruncateUp.Ctime.ToNsec(); got <= statAfterTruncateDown.Ctime.ToNsec() { - t.Errorf("fd.Stat got Ctime %v, want > %v", got, stat.Ctime) - } - - // Truncate to the current size. - newSize = statAfterTruncateUp.Size - if err := fd.SetStat(ctx, vfs.SetStatOptions{ - Stat: linux.Statx{ - Mask: linux.STATX_SIZE, - Size: newSize, - }, - }); err != nil { - t.Errorf("fd.Truncate failed: %v", err) - } - statAfterTruncateNoop, err := fd.Stat(ctx, sizeStatOpts) - if err != nil { - t.Fatalf("fd.Stat failed: %v", err) - } - // Mtime and Ctime should not be bumped, since operation is a noop. - if got := statAfterTruncateNoop.Mtime.ToNsec(); got != statAfterTruncateUp.Mtime.ToNsec() { - t.Errorf("fd.Stat got Mtime %v, want %v", got, statAfterTruncateUp.Mtime) - } - if got := statAfterTruncateNoop.Ctime.ToNsec(); got != statAfterTruncateUp.Ctime.ToNsec() { - t.Errorf("fd.Stat got Ctime %v, want %v", got, statAfterTruncateUp.Ctime) - } -} diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go deleted file mode 100644 index f7ee4aab2..000000000 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -func TestStatAfterCreate(t *testing.T) { - ctx := contexttest.Context(t) - mode := linux.FileMode(0644) - - // Run with different file types. - for _, typ := range []string{"file", "dir", "pipe"} { - t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { - var ( - fd *vfs.FileDescription - cleanup func() - err error - ) - switch typ { - case "file": - fd, cleanup, err = newFileFD(ctx, mode) - case "dir": - fd, cleanup, err = newDirFD(ctx, mode) - case "pipe": - fd, cleanup, err = newPipeFD(ctx, mode) - default: - panic(fmt.Sprintf("unknown typ %q", typ)) - } - if err != nil { - t.Fatal(err) - } - defer cleanup() - - got, err := fd.Stat(ctx, vfs.StatOptions{}) - if err != nil { - t.Fatalf("Stat failed: %v", err) - } - - // Atime, Ctime, Mtime should all be current time (non-zero). - atime, ctime, mtime := got.Atime.ToNsec(), got.Ctime.ToNsec(), got.Mtime.ToNsec() - if atime != ctime || ctime != mtime { - t.Errorf("got atime=%d ctime=%d mtime=%d, wanted equal values", atime, ctime, mtime) - } - if atime == 0 { - t.Errorf("got atime=%d, want non-zero", atime) - } - - // Btime should be 0, as it is not set by tmpfs. - if btime := got.Btime.ToNsec(); btime != 0 { - t.Errorf("got btime %d, want 0", got.Btime.ToNsec()) - } - - // Size should be 0 (except for directories, which make up a size - // of 20 per entry, including the "." and ".." entries present in - // otherwise-empty directories). - wantSize := uint64(0) - if typ == "dir" { - wantSize = 40 - } - if got.Size != wantSize { - t.Errorf("got size %d, want %d", got.Size, wantSize) - } - - // Nlink should be 1 for files, 2 for dirs. - wantNlink := uint32(1) - if typ == "dir" { - wantNlink = 2 - } - if got.Nlink != wantNlink { - t.Errorf("got nlink %d, want %d", got.Nlink, wantNlink) - } - - // UID and GID are set from context creds. - creds := auth.CredentialsFromContext(ctx) - if got.UID != uint32(creds.EffectiveKUID) { - t.Errorf("got uid %d, want %d", got.UID, uint32(creds.EffectiveKUID)) - } - if got.GID != uint32(creds.EffectiveKGID) { - t.Errorf("got gid %d, want %d", got.GID, uint32(creds.EffectiveKGID)) - } - - // Mode. - wantMode := uint16(mode) - switch typ { - case "file": - wantMode |= linux.S_IFREG - case "dir": - wantMode |= linux.S_IFDIR - case "pipe": - wantMode |= linux.S_IFIFO - default: - panic(fmt.Sprintf("unknown typ %q", typ)) - } - - if got.Mode != wantMode { - t.Errorf("got mode %x, want %x", got.Mode, wantMode) - } - - // Ino. - if got.Ino == 0 { - t.Errorf("got ino %d, want not 0", got.Ino) - } - }) - } -} - -func TestSetStatAtime(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, 0644) - if err != nil { - t.Fatal(err) - } - defer cleanup() - - allStatOptions := vfs.StatOptions{Mask: linux.STATX_ALL} - - // Get initial stat. - initialStat, err := fd.Stat(ctx, allStatOptions) - if err != nil { - t.Fatalf("Stat failed: %v", err) - } - - // Set atime, but without the mask. - if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: linux.Statx{ - Mask: 0, - Atime: linux.NsecToStatxTimestamp(100), - }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v", err) - } - // Atime should be unchanged. - if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { - t.Errorf("Stat got error: %v", err) - } else if gotStat.Atime != initialStat.Atime { - t.Errorf("Stat got atime %d, want %d", gotStat.Atime, initialStat.Atime) - } - - // Set atime, this time included in the mask. - setStat := linux.Statx{ - Mask: linux.STATX_ATIME, - Atime: linux.NsecToStatxTimestamp(100), - } - if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v", err) - } - if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { - t.Errorf("Stat got error: %v", err) - } else if gotStat.Atime != setStat.Atime { - t.Errorf("Stat got atime %d, want %d", gotStat.Atime, setStat.Atime) - } -} - -func TestSetStat(t *testing.T) { - ctx := contexttest.Context(t) - mode := linux.FileMode(0644) - - // Run with different file types. - for _, typ := range []string{"file", "dir", "pipe"} { - t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { - var ( - fd *vfs.FileDescription - cleanup func() - err error - ) - switch typ { - case "file": - fd, cleanup, err = newFileFD(ctx, mode) - case "dir": - fd, cleanup, err = newDirFD(ctx, mode) - case "pipe": - fd, cleanup, err = newPipeFD(ctx, mode) - default: - panic(fmt.Sprintf("unknown typ %q", typ)) - } - if err != nil { - t.Fatal(err) - } - defer cleanup() - - allStatOptions := vfs.StatOptions{Mask: linux.STATX_ALL} - - // Get initial stat. - initialStat, err := fd.Stat(ctx, allStatOptions) - if err != nil { - t.Fatalf("Stat failed: %v", err) - } - - // Set atime, but without the mask. - if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: linux.Statx{ - Mask: 0, - Atime: linux.NsecToStatxTimestamp(100), - }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v", err) - } - // Atime should be unchanged. - if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { - t.Errorf("Stat got error: %v", err) - } else if gotStat.Atime != initialStat.Atime { - t.Errorf("Stat got atime %d, want %d", gotStat.Atime, initialStat.Atime) - } - - // Set atime, this time included in the mask. - setStat := linux.Statx{ - Mask: linux.STATX_ATIME, - Atime: linux.NsecToStatxTimestamp(100), - } - if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v", err) - } - if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { - t.Errorf("Stat got error: %v", err) - } else if gotStat.Atime != setStat.Atime { - t.Errorf("Stat got atime %d, want %d", gotStat.Atime, setStat.Atime) - } - }) - } -} diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go new file mode 100644 index 000000000..6d9f09eef --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go @@ -0,0 +1,593 @@ +// automatically generated by stateify. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *dentryList) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.dentryList" +} + +func (l *dentryList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *dentryList) beforeSave() {} + +// +checklocksignore +func (l *dentryList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *dentryList) afterLoad() {} + +// +checklocksignore +func (l *dentryList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *dentryEntry) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.dentryEntry" +} + +func (e *dentryEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *dentryEntry) beforeSave() {} + +// +checklocksignore +func (e *dentryEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *dentryEntry) afterLoad() {} + +// +checklocksignore +func (e *dentryEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (d *deviceFile) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.deviceFile" +} + +func (d *deviceFile) StateFields() []string { + return []string{ + "inode", + "kind", + "major", + "minor", + } +} + +func (d *deviceFile) beforeSave() {} + +// +checklocksignore +func (d *deviceFile) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.inode) + stateSinkObject.Save(1, &d.kind) + stateSinkObject.Save(2, &d.major) + stateSinkObject.Save(3, &d.minor) +} + +func (d *deviceFile) afterLoad() {} + +// +checklocksignore +func (d *deviceFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.inode) + stateSourceObject.Load(1, &d.kind) + stateSourceObject.Load(2, &d.major) + stateSourceObject.Load(3, &d.minor) +} + +func (dir *directory) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.directory" +} + +func (dir *directory) StateFields() []string { + return []string{ + "dentry", + "inode", + "childMap", + "numChildren", + "childList", + } +} + +func (dir *directory) beforeSave() {} + +// +checklocksignore +func (dir *directory) StateSave(stateSinkObject state.Sink) { + dir.beforeSave() + stateSinkObject.Save(0, &dir.dentry) + stateSinkObject.Save(1, &dir.inode) + stateSinkObject.Save(2, &dir.childMap) + stateSinkObject.Save(3, &dir.numChildren) + stateSinkObject.Save(4, &dir.childList) +} + +func (dir *directory) afterLoad() {} + +// +checklocksignore +func (dir *directory) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &dir.dentry) + stateSourceObject.Load(1, &dir.inode) + stateSourceObject.Load(2, &dir.childMap) + stateSourceObject.Load(3, &dir.numChildren) + stateSourceObject.Load(4, &dir.childList) +} + +func (fd *directoryFD) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.directoryFD" +} + +func (fd *directoryFD) StateFields() []string { + return []string{ + "fileDescription", + "DirectoryFileDescriptionDefaultImpl", + "iter", + "off", + } +} + +func (fd *directoryFD) beforeSave() {} + +// +checklocksignore +func (fd *directoryFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.iter) + stateSinkObject.Save(3, &fd.off) +} + +func (fd *directoryFD) afterLoad() {} + +// +checklocksignore +func (fd *directoryFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.DirectoryFileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.iter) + stateSourceObject.Load(3, &fd.off) +} + +func (r *inodeRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.inodeRefs" +} + +func (r *inodeRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *inodeRefs) beforeSave() {} + +// +checklocksignore +func (r *inodeRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *inodeRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (n *namedPipe) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.namedPipe" +} + +func (n *namedPipe) StateFields() []string { + return []string{ + "inode", + "pipe", + } +} + +func (n *namedPipe) beforeSave() {} + +// +checklocksignore +func (n *namedPipe) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.inode) + stateSinkObject.Save(1, &n.pipe) +} + +func (n *namedPipe) afterLoad() {} + +// +checklocksignore +func (n *namedPipe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.inode) + stateSourceObject.Load(1, &n.pipe) +} + +func (rf *regularFile) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.regularFile" +} + +func (rf *regularFile) StateFields() []string { + return []string{ + "inode", + "memoryUsageKind", + "mappings", + "writableMappingPages", + "data", + "seals", + "size", + } +} + +func (rf *regularFile) beforeSave() {} + +// +checklocksignore +func (rf *regularFile) StateSave(stateSinkObject state.Sink) { + rf.beforeSave() + stateSinkObject.Save(0, &rf.inode) + stateSinkObject.Save(1, &rf.memoryUsageKind) + stateSinkObject.Save(2, &rf.mappings) + stateSinkObject.Save(3, &rf.writableMappingPages) + stateSinkObject.Save(4, &rf.data) + stateSinkObject.Save(5, &rf.seals) + stateSinkObject.Save(6, &rf.size) +} + +// +checklocksignore +func (rf *regularFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rf.inode) + stateSourceObject.Load(1, &rf.memoryUsageKind) + stateSourceObject.Load(2, &rf.mappings) + stateSourceObject.Load(3, &rf.writableMappingPages) + stateSourceObject.Load(4, &rf.data) + stateSourceObject.Load(5, &rf.seals) + stateSourceObject.Load(6, &rf.size) + stateSourceObject.AfterLoad(rf.afterLoad) +} + +func (fd *regularFileFD) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.regularFileFD" +} + +func (fd *regularFileFD) StateFields() []string { + return []string{ + "fileDescription", + "off", + } +} + +func (fd *regularFileFD) beforeSave() {} + +// +checklocksignore +func (fd *regularFileFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.fileDescription) + stateSinkObject.Save(1, &fd.off) +} + +func (fd *regularFileFD) afterLoad() {} + +// +checklocksignore +func (fd *regularFileFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.fileDescription) + stateSourceObject.Load(1, &fd.off) +} + +func (s *socketFile) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.socketFile" +} + +func (s *socketFile) StateFields() []string { + return []string{ + "inode", + "ep", + } +} + +func (s *socketFile) beforeSave() {} + +// +checklocksignore +func (s *socketFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.inode) + stateSinkObject.Save(1, &s.ep) +} + +func (s *socketFile) afterLoad() {} + +// +checklocksignore +func (s *socketFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.inode) + stateSourceObject.Load(1, &s.ep) +} + +func (s *symlink) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.symlink" +} + +func (s *symlink) StateFields() []string { + return []string{ + "inode", + "target", + } +} + +func (s *symlink) beforeSave() {} + +// +checklocksignore +func (s *symlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.inode) + stateSinkObject.Save(1, &s.target) +} + +func (s *symlink) afterLoad() {} + +// +checklocksignore +func (s *symlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.inode) + stateSourceObject.Load(1, &s.target) +} + +func (fstype *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.FilesystemType" +} + +func (fstype *FilesystemType) StateFields() []string { + return []string{} +} + +func (fstype *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fstype *FilesystemType) StateSave(stateSinkObject state.Sink) { + fstype.beforeSave() +} + +func (fstype *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fstype *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "vfsfs", + "mfp", + "clock", + "devMinor", + "mopts", + "nextInoMinusOne", + "root", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.vfsfs) + stateSinkObject.Save(1, &fs.mfp) + stateSinkObject.Save(2, &fs.clock) + stateSinkObject.Save(3, &fs.devMinor) + stateSinkObject.Save(4, &fs.mopts) + stateSinkObject.Save(5, &fs.nextInoMinusOne) + stateSinkObject.Save(6, &fs.root) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.vfsfs) + stateSourceObject.Load(1, &fs.mfp) + stateSourceObject.Load(2, &fs.clock) + stateSourceObject.Load(3, &fs.devMinor) + stateSourceObject.Load(4, &fs.mopts) + stateSourceObject.Load(5, &fs.nextInoMinusOne) + stateSourceObject.Load(6, &fs.root) +} + +func (f *FilesystemOpts) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.FilesystemOpts" +} + +func (f *FilesystemOpts) StateFields() []string { + return []string{ + "RootFileType", + "RootSymlinkTarget", + "FilesystemType", + } +} + +func (f *FilesystemOpts) beforeSave() {} + +// +checklocksignore +func (f *FilesystemOpts) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.RootFileType) + stateSinkObject.Save(1, &f.RootSymlinkTarget) + stateSinkObject.Save(2, &f.FilesystemType) +} + +func (f *FilesystemOpts) afterLoad() {} + +// +checklocksignore +func (f *FilesystemOpts) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.RootFileType) + stateSourceObject.Load(1, &f.RootSymlinkTarget) + stateSourceObject.Load(2, &f.FilesystemType) +} + +func (d *dentry) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.dentry" +} + +func (d *dentry) StateFields() []string { + return []string{ + "vfsd", + "parent", + "name", + "dentryEntry", + "inode", + } +} + +func (d *dentry) beforeSave() {} + +// +checklocksignore +func (d *dentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.parent) + stateSinkObject.Save(2, &d.name) + stateSinkObject.Save(3, &d.dentryEntry) + stateSinkObject.Save(4, &d.inode) +} + +func (d *dentry) afterLoad() {} + +// +checklocksignore +func (d *dentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.parent) + stateSourceObject.Load(2, &d.name) + stateSourceObject.Load(3, &d.dentryEntry) + stateSourceObject.Load(4, &d.inode) +} + +func (i *inode) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.inode" +} + +func (i *inode) StateFields() []string { + return []string{ + "fs", + "refs", + "xattrs", + "mode", + "nlink", + "uid", + "gid", + "ino", + "atime", + "ctime", + "mtime", + "locks", + "watches", + "impl", + } +} + +func (i *inode) beforeSave() {} + +// +checklocksignore +func (i *inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fs) + stateSinkObject.Save(1, &i.refs) + stateSinkObject.Save(2, &i.xattrs) + stateSinkObject.Save(3, &i.mode) + stateSinkObject.Save(4, &i.nlink) + stateSinkObject.Save(5, &i.uid) + stateSinkObject.Save(6, &i.gid) + stateSinkObject.Save(7, &i.ino) + stateSinkObject.Save(8, &i.atime) + stateSinkObject.Save(9, &i.ctime) + stateSinkObject.Save(10, &i.mtime) + stateSinkObject.Save(11, &i.locks) + stateSinkObject.Save(12, &i.watches) + stateSinkObject.Save(13, &i.impl) +} + +func (i *inode) afterLoad() {} + +// +checklocksignore +func (i *inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.fs) + stateSourceObject.Load(1, &i.refs) + stateSourceObject.Load(2, &i.xattrs) + stateSourceObject.Load(3, &i.mode) + stateSourceObject.Load(4, &i.nlink) + stateSourceObject.Load(5, &i.uid) + stateSourceObject.Load(6, &i.gid) + stateSourceObject.Load(7, &i.ino) + stateSourceObject.Load(8, &i.atime) + stateSourceObject.Load(9, &i.ctime) + stateSourceObject.Load(10, &i.mtime) + stateSourceObject.Load(11, &i.locks) + stateSourceObject.Load(12, &i.watches) + stateSourceObject.Load(13, &i.impl) +} + +func (fd *fileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/tmpfs.fileDescription" +} + +func (fd *fileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + } +} + +func (fd *fileDescription) beforeSave() {} + +// +checklocksignore +func (fd *fileDescription) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.LockFD) +} + +func (fd *fileDescription) afterLoad() {} + +// +checklocksignore +func (fd *fileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.LockFD) +} + +func init() { + state.Register((*dentryList)(nil)) + state.Register((*dentryEntry)(nil)) + state.Register((*deviceFile)(nil)) + state.Register((*directory)(nil)) + state.Register((*directoryFD)(nil)) + state.Register((*inodeRefs)(nil)) + state.Register((*namedPipe)(nil)) + state.Register((*regularFile)(nil)) + state.Register((*regularFileFD)(nil)) + state.Register((*socketFile)(nil)) + state.Register((*symlink)(nil)) + state.Register((*FilesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*FilesystemOpts)(nil)) + state.Register((*dentry)(nil)) + state.Register((*inode)(nil)) + state.Register((*fileDescription)(nil)) +} diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go deleted file mode 100644 index fc5323abc..000000000 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// nextFileID is used to generate unique file names. -var nextFileID int64 - -// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error -// is not nil, then cleanup should be called when the root is no longer needed. -func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) { - creds := auth.CredentialsFromContext(ctx) - - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) - } - - vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.MountOptions{}) - if err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) - } - root := mntns.Root() - root.IncRef() - return vfsObj, root, func() { - root.DecRef(ctx) - mntns.DecRef(ctx) - }, nil -} - -// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If -// the returned err is not nil, then cleanup should be called when the FD is no -// longer needed. -func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the file that will be write/read. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filename), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: linux.ModeRegular | mode, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) - } - - return fd, cleanup, nil -} - -// newDirFD is like newFileFD, but for directories. -func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the dir. - if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(dirname), - }, &vfs.MkdirOptions{ - Mode: linux.ModeDirectory | mode, - }); err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err) - } - - // Open the dir and return it. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(dirname), - }, &vfs.OpenOptions{ - Flags: linux.O_RDONLY | linux.O_DIRECTORY, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err) - } - - return fd, cleanup, nil -} - -// newPipeFD is like newFileFD, but for pipes. -func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - name := fmt.Sprintf("tmpfs-test-%d", atomic.AddInt64(&nextFileID, 1)) - - if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(name), - }, &vfs.MknodOptions{ - Mode: linux.ModeNamedPipe | mode, - }); err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create pipe %q: %v", name, err) - } - - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(name), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to open pipe %q: %v", name, err) - } - - return fd, cleanup, nil -} diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD deleted file mode 100644 index 1d855234c..000000000 --- a/pkg/sentry/fsimpl/verity/BUILD +++ /dev/null @@ -1,55 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -licenses(["notice"]) - -go_library( - name = "verity", - srcs = [ - "filesystem.go", - "save_restore.go", - "verity.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/marshal/primitive", - "//pkg/merkletree", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "verity_test", - srcs = [ - "verity_test.go", - ], - library = ":verity", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/sentry/arch", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fsimpl/verity/verity_state_autogen.go b/pkg/sentry/fsimpl/verity/verity_state_autogen.go new file mode 100644 index 000000000..dba01a68e --- /dev/null +++ b/pkg/sentry/fsimpl/verity/verity_state_autogen.go @@ -0,0 +1,240 @@ +// automatically generated by stateify. + +package verity + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fstype *FilesystemType) StateTypeName() string { + return "pkg/sentry/fsimpl/verity.FilesystemType" +} + +func (fstype *FilesystemType) StateFields() []string { + return []string{} +} + +func (fstype *FilesystemType) beforeSave() {} + +// +checklocksignore +func (fstype *FilesystemType) StateSave(stateSinkObject state.Sink) { + fstype.beforeSave() +} + +func (fstype *FilesystemType) afterLoad() {} + +// +checklocksignore +func (fstype *FilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *filesystem) StateTypeName() string { + return "pkg/sentry/fsimpl/verity.filesystem" +} + +func (fs *filesystem) StateFields() []string { + return []string{ + "vfsfs", + "creds", + "allowRuntimeEnable", + "lowerMount", + "rootDentry", + "alg", + "action", + "opts", + } +} + +func (fs *filesystem) beforeSave() {} + +// +checklocksignore +func (fs *filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.vfsfs) + stateSinkObject.Save(1, &fs.creds) + stateSinkObject.Save(2, &fs.allowRuntimeEnable) + stateSinkObject.Save(3, &fs.lowerMount) + stateSinkObject.Save(4, &fs.rootDentry) + stateSinkObject.Save(5, &fs.alg) + stateSinkObject.Save(6, &fs.action) + stateSinkObject.Save(7, &fs.opts) +} + +func (fs *filesystem) afterLoad() {} + +// +checklocksignore +func (fs *filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.vfsfs) + stateSourceObject.Load(1, &fs.creds) + stateSourceObject.Load(2, &fs.allowRuntimeEnable) + stateSourceObject.Load(3, &fs.lowerMount) + stateSourceObject.Load(4, &fs.rootDentry) + stateSourceObject.Load(5, &fs.alg) + stateSourceObject.Load(6, &fs.action) + stateSourceObject.Load(7, &fs.opts) +} + +func (i *InternalFilesystemOptions) StateTypeName() string { + return "pkg/sentry/fsimpl/verity.InternalFilesystemOptions" +} + +func (i *InternalFilesystemOptions) StateFields() []string { + return []string{ + "LowerName", + "Alg", + "AllowRuntimeEnable", + "LowerGetFSOptions", + "Action", + } +} + +func (i *InternalFilesystemOptions) beforeSave() {} + +// +checklocksignore +func (i *InternalFilesystemOptions) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.LowerName) + stateSinkObject.Save(1, &i.Alg) + stateSinkObject.Save(2, &i.AllowRuntimeEnable) + stateSinkObject.Save(3, &i.LowerGetFSOptions) + stateSinkObject.Save(4, &i.Action) +} + +func (i *InternalFilesystemOptions) afterLoad() {} + +// +checklocksignore +func (i *InternalFilesystemOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.LowerName) + stateSourceObject.Load(1, &i.Alg) + stateSourceObject.Load(2, &i.AllowRuntimeEnable) + stateSourceObject.Load(3, &i.LowerGetFSOptions) + stateSourceObject.Load(4, &i.Action) +} + +func (d *dentry) StateTypeName() string { + return "pkg/sentry/fsimpl/verity.dentry" +} + +func (d *dentry) StateFields() []string { + return []string{ + "vfsd", + "refs", + "fs", + "mode", + "uid", + "gid", + "size", + "parent", + "name", + "children", + "childrenNames", + "childrenList", + "lowerVD", + "lowerMerkleVD", + "symlinkTarget", + "hash", + } +} + +func (d *dentry) beforeSave() {} + +// +checklocksignore +func (d *dentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.refs) + stateSinkObject.Save(2, &d.fs) + stateSinkObject.Save(3, &d.mode) + stateSinkObject.Save(4, &d.uid) + stateSinkObject.Save(5, &d.gid) + stateSinkObject.Save(6, &d.size) + stateSinkObject.Save(7, &d.parent) + stateSinkObject.Save(8, &d.name) + stateSinkObject.Save(9, &d.children) + stateSinkObject.Save(10, &d.childrenNames) + stateSinkObject.Save(11, &d.childrenList) + stateSinkObject.Save(12, &d.lowerVD) + stateSinkObject.Save(13, &d.lowerMerkleVD) + stateSinkObject.Save(14, &d.symlinkTarget) + stateSinkObject.Save(15, &d.hash) +} + +// +checklocksignore +func (d *dentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.refs) + stateSourceObject.Load(2, &d.fs) + stateSourceObject.Load(3, &d.mode) + stateSourceObject.Load(4, &d.uid) + stateSourceObject.Load(5, &d.gid) + stateSourceObject.Load(6, &d.size) + stateSourceObject.Load(7, &d.parent) + stateSourceObject.Load(8, &d.name) + stateSourceObject.Load(9, &d.children) + stateSourceObject.Load(10, &d.childrenNames) + stateSourceObject.Load(11, &d.childrenList) + stateSourceObject.Load(12, &d.lowerVD) + stateSourceObject.Load(13, &d.lowerMerkleVD) + stateSourceObject.Load(14, &d.symlinkTarget) + stateSourceObject.Load(15, &d.hash) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (fd *fileDescription) StateTypeName() string { + return "pkg/sentry/fsimpl/verity.fileDescription" +} + +func (fd *fileDescription) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "d", + "isDir", + "lowerFD", + "lowerMappable", + "merkleReader", + "merkleWriter", + "parentMerkleWriter", + "off", + } +} + +func (fd *fileDescription) beforeSave() {} + +// +checklocksignore +func (fd *fileDescription) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.d) + stateSinkObject.Save(3, &fd.isDir) + stateSinkObject.Save(4, &fd.lowerFD) + stateSinkObject.Save(5, &fd.lowerMappable) + stateSinkObject.Save(6, &fd.merkleReader) + stateSinkObject.Save(7, &fd.merkleWriter) + stateSinkObject.Save(8, &fd.parentMerkleWriter) + stateSinkObject.Save(9, &fd.off) +} + +func (fd *fileDescription) afterLoad() {} + +// +checklocksignore +func (fd *fileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.d) + stateSourceObject.Load(3, &fd.isDir) + stateSourceObject.Load(4, &fd.lowerFD) + stateSourceObject.Load(5, &fd.lowerMappable) + stateSourceObject.Load(6, &fd.merkleReader) + stateSourceObject.Load(7, &fd.merkleWriter) + stateSourceObject.Load(8, &fd.parentMerkleWriter) + stateSourceObject.Load(9, &fd.off) +} + +func init() { + state.Register((*FilesystemType)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*InternalFilesystemOptions)(nil)) + state.Register((*dentry)(nil)) + state.Register((*fileDescription)(nil)) +} diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go deleted file mode 100644 index af041bd50..000000000 --- a/pkg/sentry/fsimpl/verity/verity_test.go +++ /dev/null @@ -1,1211 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package verity - -import ( - "fmt" - "io" - "math/rand" - "strconv" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -const ( - // rootMerkleFilename is the name of the root Merkle tree file. - rootMerkleFilename = "root.verity" - // maxDataSize is the maximum data size of a test file. - maxDataSize = 100000 -) - -var hashAlgs = []HashAlgorithm{SHA256, SHA512} - -func dentryFromVD(t *testing.T, vd vfs.VirtualDentry) *dentry { - t.Helper() - d, ok := vd.Dentry().Impl().(*dentry) - if !ok { - t.Fatalf("can't assert %T as a *dentry", vd) - } - return d -} - -// dentryFromFD returns the dentry corresponding to fd. -func dentryFromFD(t *testing.T, fd *vfs.FileDescription) *dentry { - t.Helper() - f, ok := fd.Impl().(*fileDescription) - if !ok { - t.Fatalf("can't assert %T as a *fileDescription", fd) - } - return f.d -} - -// newVerityRoot creates a new verity mount, and returns the root. The -// underlying file system is tmpfs. If the error is not nil, then cleanup -// should be called when the root is no longer needed. -func newVerityRoot(t *testing.T, hashAlg HashAlgorithm) (*vfs.VirtualFilesystem, vfs.VirtualDentry, context.Context, error) { - t.Helper() - k, err := testutil.Boot() - if err != nil { - t.Fatalf("testutil.Boot: %v", err) - } - - ctx := k.SupervisorContext() - - rand.Seed(time.Now().UnixNano()) - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) - } - - vfsObj.MustRegisterFilesystemType("verity", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - - vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - - data := "root_name=" + rootMerkleFilename - mntns, err := vfsObj.NewMountNamespace(ctx, auth.CredentialsFromContext(ctx), "", "verity", &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - Data: data, - InternalData: InternalFilesystemOptions{ - LowerName: "tmpfs", - Alg: hashAlg, - AllowRuntimeEnable: true, - Action: ErrorOnViolation, - }, - }, - }) - if err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("NewMountNamespace: %v", err) - } - root := mntns.Root() - root.IncRef() - - // Use lowerRoot in the task as we modify the lower file system - // directly in many tests. - lowerRoot := root.Dentry().Impl().(*dentry).lowerVD - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(ctx, "name", tc, mntns, lowerRoot, lowerRoot) - if err != nil { - t.Fatalf("testutil.CreateTask: %v", err) - } - - t.Cleanup(func() { - root.DecRef(ctx) - mntns.DecRef(ctx) - }) - return vfsObj, root, task.AsyncContext(), nil -} - -// openVerityAt opens a verity file. -// -// TODO(chongc): release reference from opening the file when done. -func openVerityAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, vd vfs.VirtualDentry, path string, flags uint32, mode linux.FileMode) (*vfs.FileDescription, error) { - return vfsObj.OpenAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: vd, - Start: vd, - Path: fspath.Parse(path), - }, &vfs.OpenOptions{ - Flags: flags, - Mode: mode, - }) -} - -// openLowerAt opens the file in the underlying file system. -// -// TODO(chongc): release reference from opening the file when done. -func (d *dentry) openLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, path string, flags uint32, mode linux.FileMode) (*vfs.FileDescription, error) { - return vfsObj.OpenAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(path), - }, &vfs.OpenOptions{ - Flags: flags, - Mode: mode, - }) -} - -// openLowerMerkleAt opens the Merkle file in the underlying file system. -// -// TODO(chongc): release reference from opening the file when done. -func (d *dentry) openLowerMerkleAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, flags uint32, mode linux.FileMode) (*vfs.FileDescription, error) { - return vfsObj.OpenAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerMerkleVD, - Start: d.lowerMerkleVD, - }, &vfs.OpenOptions{ - Flags: flags, - Mode: mode, - }) -} - -// mkdirLowerAt creates a directory in the underlying file system. -func (d *dentry) mkdirLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, path string, mode linux.FileMode) error { - return vfsObj.MkdirAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(path), - }, &vfs.MkdirOptions{ - Mode: mode, - }) -} - -// unlinkLowerAt deletes the file in the underlying file system. -func (d *dentry) unlinkLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, path string) error { - return vfsObj.UnlinkAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(path), - }) -} - -// unlinkLowerMerkleAt deletes the Merkle file in the underlying file system. -func (d *dentry) unlinkLowerMerkleAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, path string) error { - return vfsObj.UnlinkAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(merklePrefix + path), - }) -} - -// renameLowerAt renames file name to newName in the underlying file system. -func (d *dentry) renameLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, name string, newName string) error { - return vfsObj.RenameAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(name), - }, &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(newName), - }, &vfs.RenameOptions{}) -} - -// renameLowerMerkleAt renames Merkle file name to newName in the underlying -// file system. -func (d *dentry) renameLowerMerkleAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, name string, newName string) error { - return vfsObj.RenameAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(merklePrefix + name), - }, &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(merklePrefix + newName), - }, &vfs.RenameOptions{}) -} - -// symlinkLowerAt creates a symbolic link at symlink referring to the given target -// in the underlying filesystem. -func (d *dentry) symlinkLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, target, symlink string) error { - return vfsObj.SymlinkAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(symlink), - }, target) -} - -// newFileFD creates a new file in the verity mount, and returns the FD. The FD -// points to a file that has random data generated. -func newFileFD(ctx context.Context, t *testing.T, vfsObj *vfs.VirtualFilesystem, root vfs.VirtualDentry, filePath string, mode linux.FileMode) (*vfs.FileDescription, int, error) { - // Create the file in the underlying file system. - lowerFD, err := dentryFromVD(t, root).openLowerAt(ctx, vfsObj, filePath, linux.O_RDWR|linux.O_CREAT|linux.O_EXCL, linux.ModeRegular|mode) - if err != nil { - return nil, 0, err - } - - // Generate random data to be written to the file. - dataSize := rand.Intn(maxDataSize) + 1 - data := make([]byte, dataSize) - rand.Read(data) - - // Write directly to the underlying FD, since verity FD is read-only. - n, err := lowerFD.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - return nil, 0, err - } - - if n != int64(len(data)) { - return nil, 0, fmt.Errorf("lowerFD.Write got write length %d, want %d", n, len(data)) - } - - lowerFD.DecRef(ctx) - - // Now open the verity file descriptor. - fd, err := openVerityAt(ctx, vfsObj, root, filePath, linux.O_RDONLY, mode) - return fd, dataSize, err -} - -// newDirFD creates a new directory in the verity mount, and returns the FD. -func newDirFD(ctx context.Context, t *testing.T, vfsObj *vfs.VirtualFilesystem, root vfs.VirtualDentry, dirPath string, mode linux.FileMode) (*vfs.FileDescription, error) { - // Create the directory in the underlying file system. - if err := dentryFromVD(t, root).mkdirLowerAt(ctx, vfsObj, dirPath, linux.ModeRegular|mode); err != nil { - return nil, err - } - if _, err := dentryFromVD(t, root).openLowerAt(ctx, vfsObj, dirPath, linux.O_RDONLY|linux.O_DIRECTORY, linux.ModeRegular|mode); err != nil { - return nil, err - } - return openVerityAt(ctx, vfsObj, root, dirPath, linux.O_RDONLY|linux.O_DIRECTORY, mode) -} - -// newEmptyFileFD creates a new empty file in the verity mount, and returns the FD. -func newEmptyFileFD(ctx context.Context, t *testing.T, vfsObj *vfs.VirtualFilesystem, root vfs.VirtualDentry, filePath string, mode linux.FileMode) (*vfs.FileDescription, error) { - // Create the file in the underlying file system. - _, err := dentryFromVD(t, root).openLowerAt(ctx, vfsObj, filePath, linux.O_RDWR|linux.O_CREAT|linux.O_EXCL, linux.ModeRegular|mode) - if err != nil { - return nil, err - } - // Now open the verity file descriptor. - fd, err := openVerityAt(ctx, vfsObj, root, filePath, linux.O_RDONLY, mode) - return fd, err -} - -// flipRandomBit randomly flips a bit in the file represented by fd. -func flipRandomBit(ctx context.Context, fd *vfs.FileDescription, size int) error { - randomPos := int64(rand.Intn(size)) - byteToModify := make([]byte, 1) - if _, err := fd.PRead(ctx, usermem.BytesIOSequence(byteToModify), randomPos, vfs.ReadOptions{}); err != nil { - return fmt.Errorf("lowerFD.PRead: %v", err) - } - byteToModify[0] ^= 1 - if _, err := fd.PWrite(ctx, usermem.BytesIOSequence(byteToModify), randomPos, vfs.WriteOptions{}); err != nil { - return fmt.Errorf("lowerFD.PWrite: %v", err) - } - return nil -} - -func enableVerity(ctx context.Context, t *testing.T, fd *vfs.FileDescription) { - t.Helper() - var args arch.SyscallArguments - args[1] = arch.SyscallArgument{Value: linux.FS_IOC_ENABLE_VERITY} - if _, err := fd.Ioctl(ctx, nil /* uio */, args); err != nil { - t.Fatalf("enable verity: %v", err) - } -} - -// TestOpen ensures that when a file is created, the corresponding Merkle tree -// file and the root Merkle tree file exist. -func TestOpen(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Ensure that the corresponding Merkle tree file is created. - if _, err = dentryFromFD(t, fd).openLowerMerkleAt(ctx, vfsObj, linux.O_RDONLY, linux.ModeRegular); err != nil { - t.Errorf("OpenAt Merkle tree file %s: %v", merklePrefix+filename, err) - } - - // Ensure the root merkle tree file is created. - if _, err = dentryFromVD(t, root).openLowerMerkleAt(ctx, vfsObj, linux.O_RDONLY, linux.ModeRegular); err != nil { - t.Errorf("OpenAt root Merkle tree file %s: %v", merklePrefix+rootMerkleFilename, err) - } - } -} - -// TestPReadUnmodifiedFileSucceeds ensures that pread from an untouched verity -// file succeeds after enabling verity for it. -func TestPReadUnmodifiedFileSucceeds(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, size, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file and confirm a normal read succeeds. - enableVerity(ctx, t, fd) - - buf := make([]byte, size) - n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0 /* offset */, vfs.ReadOptions{}) - if err != nil && err != io.EOF { - t.Fatalf("fd.PRead: %v", err) - } - - if n != int64(size) { - t.Errorf("fd.PRead got read length %d, want %d", n, size) - } - } -} - -// TestReadUnmodifiedFileSucceeds ensures that read from an untouched verity -// file succeeds after enabling verity for it. -func TestReadUnmodifiedFileSucceeds(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, size, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file and confirm a normal read succeeds. - enableVerity(ctx, t, fd) - - buf := make([]byte, size) - n, err := fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) - if err != nil && err != io.EOF { - t.Fatalf("fd.Read: %v", err) - } - - if n != int64(size) { - t.Errorf("fd.PRead got read length %d, want %d", n, size) - } - } -} - -// TestReadUnmodifiedEmptyFileSucceeds ensures that read from an untouched empty verity -// file succeeds after enabling verity for it. -func TestReadUnmodifiedEmptyFileSucceeds(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-empty-file" - fd, err := newEmptyFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newEmptyFileFD: %v", err) - } - - // Enable verity on the file and confirm a normal read succeeds. - enableVerity(ctx, t, fd) - - var buf []byte - n, err := fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) - if err != nil && err != io.EOF { - t.Fatalf("fd.Read: %v", err) - } - - if n != 0 { - t.Errorf("fd.Read got read length %d, expected 0", n) - } - } -} - -// TestReopenUnmodifiedFileSucceeds ensures that reopen an untouched verity file -// succeeds after enabling verity for it. -func TestReopenUnmodifiedFileSucceeds(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file and confirms a normal read succeeds. - enableVerity(ctx, t, fd) - - // Ensure reopening the verity enabled file succeeds. - if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); err != nil { - t.Errorf("reopen enabled file failed: %v", err) - } - } -} - -// TestOpenNonexistentFile ensures that opening a nonexistent file does not -// trigger verification failure, even if the parent directory is verified. -func TestOpenNonexistentFile(t *testing.T) { - vfsObj, root, ctx, err := newVerityRoot(t, SHA256) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file and confirms a normal read succeeds. - enableVerity(ctx, t, fd) - - // Enable verity on the parent directory. - parentFD, err := openVerityAt(ctx, vfsObj, root, "", linux.O_RDONLY, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt: %v", err) - } - enableVerity(ctx, t, parentFD) - - // Ensure open an unexpected file in the parent directory fails with - // ENOENT rather than verification failure. - if _, err = openVerityAt(ctx, vfsObj, root, filename+"abc", linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.ENOENT, err) { - t.Errorf("OpenAt unexpected error: %v", err) - } -} - -// TestPReadModifiedFileFails ensures that read from a modified verity file -// fails. -func TestPReadModifiedFileFails(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, size, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - // Open a new lowerFD that's read/writable. - lowerFD, err := dentryFromFD(t, fd).openLowerAt(ctx, vfsObj, "", linux.O_RDWR, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt: %v", err) - } - - if err := flipRandomBit(ctx, lowerFD, size); err != nil { - t.Fatalf("flipRandomBit: %v", err) - } - - // Confirm that read from the modified file fails. - buf := make([]byte, size) - if _, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0 /* offset */, vfs.ReadOptions{}); err == nil { - t.Fatalf("fd.PRead succeeded, expected failure") - } - } -} - -// TestReadModifiedFileFails ensures that read from a modified verity file -// fails. -func TestReadModifiedFileFails(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, size, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - // Open a new lowerFD that's read/writable. - lowerFD, err := dentryFromFD(t, fd).openLowerAt(ctx, vfsObj, "", linux.O_RDWR, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt: %v", err) - } - - if err := flipRandomBit(ctx, lowerFD, size); err != nil { - t.Fatalf("flipRandomBit: %v", err) - } - - // Confirm that read from the modified file fails. - buf := make([]byte, size) - if _, err := fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}); err == nil { - t.Fatalf("fd.Read succeeded, expected failure") - } - } -} - -// TestModifiedMerkleFails ensures that read from a verity file fails if the -// corresponding Merkle tree file is modified. -func TestModifiedMerkleFails(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, size, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - // Open a new lowerMerkleFD that's read/writable. - lowerMerkleFD, err := dentryFromFD(t, fd).openLowerMerkleAt(ctx, vfsObj, linux.O_RDWR, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt: %v", err) - } - - // Flip a random bit in the Merkle tree file. - stat, err := lowerMerkleFD.Stat(ctx, vfs.StatOptions{}) - if err != nil { - t.Errorf("lowerMerkleFD.Stat: %v", err) - } - - if err := flipRandomBit(ctx, lowerMerkleFD, int(stat.Size)); err != nil { - t.Fatalf("flipRandomBit: %v", err) - } - - // Confirm that read from a file with modified Merkle tree fails. - buf := make([]byte, size) - if _, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0 /* offset */, vfs.ReadOptions{}); err == nil { - t.Fatalf("fd.PRead succeeded with modified Merkle file") - } - } -} - -// TestModifiedParentMerkleFails ensures that open a verity enabled file in a -// verity enabled directory fails if the hashes related to the target file in -// the parent Merkle tree file is modified. -func TestModifiedParentMerkleFails(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - // Enable verity on the parent directory. - parentFD, err := openVerityAt(ctx, vfsObj, root, "", linux.O_RDONLY, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt: %v", err) - } - enableVerity(ctx, t, parentFD) - - // Open a new lowerMerkleFD that's read/writable. - parentLowerMerkleFD, err := dentryFromFD(t, fd).parent.openLowerMerkleAt(ctx, vfsObj, linux.O_RDWR, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt: %v", err) - } - - // Flip a random bit in the parent Merkle tree file. - // This parent directory contains only one child, so any random - // modification in the parent Merkle tree should cause verification - // failure when opening the child file. - sizeString, err := parentLowerMerkleFD.GetXattr(ctx, &vfs.GetXattrOptions{ - Name: childrenOffsetXattr, - Size: sizeOfStringInt32, - }) - if err != nil { - t.Fatalf("parentLowerMerkleFD.GetXattr: %v", err) - } - parentMerkleSize, err := strconv.Atoi(sizeString) - if err != nil { - t.Fatalf("Failed convert size to int: %v", err) - } - if err := flipRandomBit(ctx, parentLowerMerkleFD, parentMerkleSize); err != nil { - t.Fatalf("flipRandomBit: %v", err) - } - - parentLowerMerkleFD.DecRef(ctx) - - // Ensure reopening the verity enabled file fails. - if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); err == nil { - t.Errorf("OpenAt file with modified parent Merkle succeeded") - } - } -} - -// TestUnmodifiedStatSucceeds ensures that stat of an untouched verity file -// succeeds after enabling verity for it. -func TestUnmodifiedStatSucceeds(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file and confirm that stat succeeds. - enableVerity(ctx, t, fd) - if _, err := fd.Stat(ctx, vfs.StatOptions{}); err != nil { - t.Errorf("fd.Stat: %v", err) - } - } -} - -// TestModifiedStatFails checks that getting stat for a file with modified stat -// should fail. -func TestModifiedStatFails(t *testing.T) { - for _, alg := range hashAlgs { - vfsObj, root, ctx, err := newVerityRoot(t, alg) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - lowerFD := fd.Impl().(*fileDescription).lowerFD - // Change the stat of the underlying file, and check that stat fails. - if err := lowerFD.SetStat(ctx, vfs.SetStatOptions{ - Stat: linux.Statx{ - Mask: uint32(linux.STATX_MODE), - Mode: 0777, - }, - }); err != nil { - t.Fatalf("lowerFD.SetStat: %v", err) - } - - if _, err := fd.Stat(ctx, vfs.StatOptions{}); err == nil { - t.Errorf("fd.Stat succeeded when it should fail") - } - } -} - -// TestOpenDeletedFileFails ensures that opening a deleted verity enabled file -// and/or the corresponding Merkle tree file fails with the verity error. -func TestOpenDeletedFileFails(t *testing.T) { - testCases := []struct { - name string - // The original file is removed if changeFile is true. - changeFile bool - // The Merkle tree file is removed if changeMerkleFile is true. - changeMerkleFile bool - }{ - { - name: "FileOnly", - changeFile: true, - changeMerkleFile: false, - }, - { - name: "MerkleOnly", - changeFile: false, - changeMerkleFile: true, - }, - { - name: "FileAndMerkle", - changeFile: true, - changeMerkleFile: true, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - vfsObj, root, ctx, err := newVerityRoot(t, SHA256) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - if tc.changeFile { - if err := dentryFromVD(t, root).unlinkLowerAt(ctx, vfsObj, filename); err != nil { - t.Fatalf("UnlinkAt: %v", err) - } - } - if tc.changeMerkleFile { - if err := dentryFromVD(t, root).unlinkLowerMerkleAt(ctx, vfsObj, filename); err != nil { - t.Fatalf("UnlinkAt: %v", err) - } - } - - // Ensure reopening the verity enabled file fails. - if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("got OpenAt error: %v, expected EIO", err) - } - }) - } -} - -// TestOpenRenamedFileFails ensures that opening a renamed verity enabled file -// and/or the corresponding Merkle tree file fails with the verity error. -func TestOpenRenamedFileFails(t *testing.T) { - testCases := []struct { - name string - // The original file is renamed if changeFile is true. - changeFile bool - // The Merkle tree file is renamed if changeMerkleFile is true. - changeMerkleFile bool - }{ - { - name: "FileOnly", - changeFile: true, - changeMerkleFile: false, - }, - { - name: "MerkleOnly", - changeFile: false, - changeMerkleFile: true, - }, - { - name: "FileAndMerkle", - changeFile: true, - changeMerkleFile: true, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - vfsObj, root, ctx, err := newVerityRoot(t, SHA256) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - filename := "verity-test-file" - fd, _, err := newFileFD(ctx, t, vfsObj, root, filename, 0644) - if err != nil { - t.Fatalf("newFileFD: %v", err) - } - - // Enable verity on the file. - enableVerity(ctx, t, fd) - - newFilename := "renamed-test-file" - if tc.changeFile { - if err := dentryFromVD(t, root).renameLowerAt(ctx, vfsObj, filename, newFilename); err != nil { - t.Fatalf("RenameAt: %v", err) - } - } - if tc.changeMerkleFile { - if err := dentryFromVD(t, root).renameLowerMerkleAt(ctx, vfsObj, filename, newFilename); err != nil { - t.Fatalf("UnlinkAt: %v", err) - } - } - - // Ensure reopening the verity enabled file fails. - if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("got OpenAt error: %v, expected EIO", err) - } - }) - } -} - -// TestUnmodifiedSymlinkFileReadSucceeds ensures that readlink() for an -// unmodified verity enabled symlink succeeds. -func TestUnmodifiedSymlinkFileReadSucceeds(t *testing.T) { - testCases := []struct { - name string - // The symlink target is a directory. - hasDirectoryTarget bool - // The symlink target is a directory and contains a regular file which will be - // used to test walking a symlink. - testWalk bool - }{ - { - name: "RegularFileTarget", - hasDirectoryTarget: false, - testWalk: false, - }, - { - name: "DirectoryTarget", - hasDirectoryTarget: true, - testWalk: false, - }, - { - name: "RegularFileInSymlinkDirectory", - hasDirectoryTarget: true, - testWalk: true, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - if tc.testWalk && !tc.hasDirectoryTarget { - t.Fatalf("Invalid test case: hasDirectoryTarget can't be false when testing symlink walk") - } - - vfsObj, root, ctx, err := newVerityRoot(t, SHA256) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - var target string - if tc.hasDirectoryTarget { - target = "verity-test-dir" - if _, err := newDirFD(ctx, t, vfsObj, root, target, 0644); err != nil { - t.Fatalf("newDirFD: %v", err) - } - } else { - target = "verity-test-file" - if _, _, err := newFileFD(ctx, t, vfsObj, root, target, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - if tc.testWalk { - fileInTargetDirectory := target + "/" + "verity-test-file" - if _, _, err := newFileFD(ctx, t, vfsObj, root, fileInTargetDirectory, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - symlink := "verity-test-symlink" - if err := dentryFromVD(t, root).symlinkLowerAt(ctx, vfsObj, target, symlink); err != nil { - t.Fatalf("SymlinkAt: %v", err) - } - - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) - - if err != nil { - t.Fatalf("openVerityAt symlink: %v", err) - } - - enableVerity(ctx, t, fd) - - if _, err := vfsObj.ReadlinkAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(symlink), - }); err != nil { - t.Fatalf("ReadlinkAt: %v", err) - } - - if tc.testWalk { - fileInSymlinkDirectory := symlink + "/verity-test-file" - // Ensure opening the verity enabled file in the symlink directory succeeds. - if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); err != nil { - t.Errorf("open enabled file failed: %v", err) - } - } - }) - } -} - -// TestDeletedSymlinkFileReadFails ensures that reading value of a deleted verity enabled -// symlink fails. -func TestDeletedSymlinkFileReadFails(t *testing.T) { - testCases := []struct { - name string - // The original symlink is unlinked if deleteLink is true. - deleteLink bool - // The Merkle tree file is renamed if deleteMerkleFile is true. - deleteMerkleFile bool - // The symlink target is a directory. - hasDirectoryTarget bool - // The symlink target is a directory and contains a regular file which will be - // used to test walking a symlink. - testWalk bool - }{ - { - name: "DeleteLinkRegularFile", - deleteLink: true, - deleteMerkleFile: false, - hasDirectoryTarget: false, - testWalk: false, - }, - { - name: "DeleteMerkleRegFile", - deleteLink: false, - deleteMerkleFile: true, - hasDirectoryTarget: false, - testWalk: false, - }, - { - name: "DeleteLinkAndMerkleRegFile", - deleteLink: true, - deleteMerkleFile: true, - hasDirectoryTarget: false, - testWalk: false, - }, - { - name: "DeleteLinkDirectory", - deleteLink: true, - deleteMerkleFile: false, - hasDirectoryTarget: true, - testWalk: false, - }, - { - name: "DeleteMerkleDirectory", - deleteLink: false, - deleteMerkleFile: true, - hasDirectoryTarget: true, - testWalk: false, - }, - { - name: "DeleteLinkAndMerkleDirectory", - deleteLink: true, - deleteMerkleFile: true, - hasDirectoryTarget: true, - testWalk: false, - }, - { - name: "DeleteLinkDirectoryWalk", - deleteLink: true, - deleteMerkleFile: false, - hasDirectoryTarget: true, - testWalk: true, - }, - { - name: "DeleteMerkleDirectoryWalk", - deleteLink: false, - deleteMerkleFile: true, - hasDirectoryTarget: true, - testWalk: true, - }, - { - name: "DeleteLinkAndMerkleDirectoryWalk", - deleteLink: true, - deleteMerkleFile: true, - hasDirectoryTarget: true, - testWalk: true, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - if tc.testWalk && !tc.hasDirectoryTarget { - t.Fatalf("Invalid test case: hasDirectoryTarget can't be false when testing symlink walk") - } - - vfsObj, root, ctx, err := newVerityRoot(t, SHA256) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - var target string - if tc.hasDirectoryTarget { - target = "verity-test-dir" - if _, err := newDirFD(ctx, t, vfsObj, root, target, 0644); err != nil { - t.Fatalf("newDirFD: %v", err) - } - } else { - target = "verity-test-file" - if _, _, err := newFileFD(ctx, t, vfsObj, root, target, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - symlink := "verity-test-symlink" - if err := dentryFromVD(t, root).symlinkLowerAt(ctx, vfsObj, target, symlink); err != nil { - t.Fatalf("SymlinkAt: %v", err) - } - - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) - - if err != nil { - t.Fatalf("openVerityAt symlink: %v", err) - } - - if tc.testWalk { - fileInTargetDirectory := target + "/" + "verity-test-file" - if _, _, err := newFileFD(ctx, t, vfsObj, root, fileInTargetDirectory, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - enableVerity(ctx, t, fd) - - if tc.deleteLink { - if err := dentryFromVD(t, root).unlinkLowerAt(ctx, vfsObj, symlink); err != nil { - t.Fatalf("UnlinkAt: %v", err) - } - } - if tc.deleteMerkleFile { - if err := dentryFromVD(t, root).unlinkLowerMerkleAt(ctx, vfsObj, symlink); err != nil { - t.Fatalf("UnlinkAt: %v", err) - } - } - if _, err := vfsObj.ReadlinkAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(symlink), - }); !linuxerr.Equals(linuxerr.EIO, err) { - t.Fatalf("ReadlinkAt succeeded with modified symlink: %v", err) - } - - if tc.testWalk { - fileInSymlinkDirectory := symlink + "/verity-test-file" - // Ensure opening the verity enabled file in the symlink directory fails. - if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("Open succeeded with modified symlink: %v", err) - } - } - }) - } -} - -// TestModifiedSymlinkFileReadFails ensures that reading value of a modified verity enabled -// symlink fails. -func TestModifiedSymlinkFileReadFails(t *testing.T) { - testCases := []struct { - name string - // The symlink target is a directory. - hasDirectoryTarget bool - // The symlink target is a directory and contains a regular file which will be - // used to test walking a symlink. - testWalk bool - }{ - { - name: "RegularFileTarget", - hasDirectoryTarget: false, - testWalk: false, - }, - { - name: "DirectoryTarget", - hasDirectoryTarget: true, - testWalk: false, - }, - { - name: "RegularFileInSymlinkDirectory", - hasDirectoryTarget: true, - testWalk: true, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - if tc.testWalk && !tc.hasDirectoryTarget { - t.Fatalf("Invalid test case: hasDirectoryTarget can't be false when testing symlink walk") - } - - vfsObj, root, ctx, err := newVerityRoot(t, SHA256) - if err != nil { - t.Fatalf("newVerityRoot: %v", err) - } - - var target string - if tc.hasDirectoryTarget { - target = "verity-test-dir" - if _, err := newDirFD(ctx, t, vfsObj, root, target, 0644); err != nil { - t.Fatalf("newDirFD: %v", err) - } - } else { - target = "verity-test-file" - if _, _, err := newFileFD(ctx, t, vfsObj, root, target, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - // Create symlink which points to target file. - symlink := "verity-test-symlink" - if err := dentryFromVD(t, root).symlinkLowerAt(ctx, vfsObj, target, symlink); err != nil { - t.Fatalf("SymlinkAt: %v", err) - } - - // Open symlink file to get the fd for ioctl in new step. - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) - if err != nil { - t.Fatalf("OpenAt symlink: %v", err) - } - - if tc.testWalk { - fileInTargetDirectory := target + "/" + "verity-test-file" - if _, _, err := newFileFD(ctx, t, vfsObj, root, fileInTargetDirectory, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - enableVerity(ctx, t, fd) - - var newTarget string - if tc.hasDirectoryTarget { - newTarget = "verity-test-dir-new" - if _, err := newDirFD(ctx, t, vfsObj, root, newTarget, 0644); err != nil { - t.Fatalf("newDirFD: %v", err) - } - } else { - newTarget = "verity-test-file-new" - if _, _, err := newFileFD(ctx, t, vfsObj, root, newTarget, 0644); err != nil { - t.Fatalf("newFileFD: %v", err) - } - } - - // Unlink symlink->target. - if err := dentryFromVD(t, root).unlinkLowerAt(ctx, vfsObj, symlink); err != nil { - t.Fatalf("UnlinkAt: %v", err) - } - - // Link symlink->newTarget. - if err := dentryFromVD(t, root).symlinkLowerAt(ctx, vfsObj, newTarget, symlink); err != nil { - t.Fatalf("SymlinkAt: %v", err) - } - - // Freshen lower dentry for symlink. - symlinkVD, err := vfsObj.GetDentryAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(symlink), - }, &vfs.GetDentryOptions{}) - if err != nil { - t.Fatalf("Failed to get symlink dentry: %v", err) - } - symlinkDentry := dentryFromVD(t, symlinkVD) - - symlinkLowerVD, err := dentryFromVD(t, root).getLowerAt(ctx, vfsObj, symlink) - if err != nil { - t.Fatalf("Failed to get symlink lower dentry: %v", err) - } - symlinkDentry.lowerVD = symlinkLowerVD - - // Verify ReadlinkAt() fails. - if _, err := vfsObj.ReadlinkAt(ctx, auth.CredentialsFromContext(ctx), &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(symlink), - }); !linuxerr.Equals(linuxerr.EIO, err) { - t.Fatalf("ReadlinkAt succeeded with modified symlink: %v", err) - } - - if tc.testWalk { - fileInSymlinkDirectory := symlink + "/verity-test-file" - // Ensure opening the verity enabled file in the symlink directory fails. - if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("Open succeeded with modified symlink: %v", err) - } - } - }) - } -} diff --git a/pkg/sentry/fsmetric/BUILD b/pkg/sentry/fsmetric/BUILD deleted file mode 100644 index 4e86fbdd8..000000000 --- a/pkg/sentry/fsmetric/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "fsmetric", - srcs = ["fsmetric.go"], - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/metric"], -) diff --git a/pkg/sentry/fsmetric/fsmetric_state_autogen.go b/pkg/sentry/fsmetric/fsmetric_state_autogen.go new file mode 100644 index 000000000..61975bbb4 --- /dev/null +++ b/pkg/sentry/fsmetric/fsmetric_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package fsmetric diff --git a/pkg/sentry/hostcpu/BUILD b/pkg/sentry/hostcpu/BUILD deleted file mode 100644 index e6933aa70..000000000 --- a/pkg/sentry/hostcpu/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "hostcpu", - srcs = [ - "getcpu_amd64.s", - "getcpu_arm64.s", - "hostcpu.go", - ], - visibility = ["//:sandbox"], -) - -go_test( - name = "hostcpu_test", - size = "small", - srcs = ["hostcpu_test.go"], - library = ":hostcpu", -) diff --git a/pkg/sentry/hostcpu/hostcpu_state_autogen.go b/pkg/sentry/hostcpu/hostcpu_state_autogen.go new file mode 100644 index 000000000..97d33d8bf --- /dev/null +++ b/pkg/sentry/hostcpu/hostcpu_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package hostcpu diff --git a/pkg/sentry/hostcpu/hostcpu_test.go b/pkg/sentry/hostcpu/hostcpu_test.go deleted file mode 100644 index 7d6885c9e..000000000 --- a/pkg/sentry/hostcpu/hostcpu_test.go +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package hostcpu - -import ( - "fmt" - "testing" -) - -func TestMaxValueInLinuxBitmap(t *testing.T) { - for _, test := range []struct { - str string - max uint64 - }{ - {"0", 0}, - {"0\n", 0}, - {"0,2", 2}, - {"0-63", 63}, - {"0-3,8-11", 11}, - } { - t.Run(fmt.Sprintf("%q", test.str), func(t *testing.T) { - max, err := maxValueInLinuxBitmap(test.str) - if err != nil || max != test.max { - t.Errorf("maxValueInLinuxBitmap: got (%d, %v), wanted (%d, nil)", max, err, test.max) - } - }) - } -} - -func TestMaxValueInLinuxBitmapErrors(t *testing.T) { - for _, str := range []string{"", "\n"} { - t.Run(fmt.Sprintf("%q", str), func(t *testing.T) { - max, err := maxValueInLinuxBitmap(str) - if err == nil { - t.Errorf("maxValueInLinuxBitmap: got (%d, nil), wanted (_, error)", max) - } - t.Log(err) - }) - } -} diff --git a/pkg/sentry/hostfd/BUILD b/pkg/sentry/hostfd/BUILD deleted file mode 100644 index db3b0d0a0..000000000 --- a/pkg/sentry/hostfd/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "hostfd", - srcs = [ - "hostfd.go", - "hostfd_linux.go", - "hostfd_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/log", - "//pkg/safemem", - "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/hostfd/hostfd_linux_state_autogen.go b/pkg/sentry/hostfd/hostfd_linux_state_autogen.go new file mode 100644 index 000000000..24e7ba400 --- /dev/null +++ b/pkg/sentry/hostfd/hostfd_linux_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build go1.1 +// +build go1.1 + +package hostfd diff --git a/pkg/sentry/hostfd/hostfd_state_autogen.go b/pkg/sentry/hostfd/hostfd_state_autogen.go new file mode 100644 index 000000000..9033424d5 --- /dev/null +++ b/pkg/sentry/hostfd/hostfd_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package hostfd diff --git a/pkg/sentry/hostfd/hostfd_unsafe_state_autogen.go b/pkg/sentry/hostfd/hostfd_unsafe_state_autogen.go new file mode 100644 index 000000000..9033424d5 --- /dev/null +++ b/pkg/sentry/hostfd/hostfd_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package hostfd diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD deleted file mode 100644 index 66fa1ad40..000000000 --- a/pkg/sentry/hostmm/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "hostmm", - srcs = [ - "cgroup.go", - "hostmm.go", - "membarrier.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/fd", - "//pkg/hostarch", - "//pkg/log", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/hostmm/hostmm_state_autogen.go b/pkg/sentry/hostmm/hostmm_state_autogen.go new file mode 100644 index 000000000..925c56e14 --- /dev/null +++ b/pkg/sentry/hostmm/hostmm_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package hostmm diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD deleted file mode 100644 index 5bba9de0b..000000000 --- a/pkg/sentry/inet/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -go_library( - name = "inet", - srcs = [ - "context.go", - "inet.go", - "namespace.go", - "test_stack.go", - ], - deps = [ - "//pkg/context", - "//pkg/tcpip", - "//pkg/tcpip/stack", - ], -) diff --git a/pkg/sentry/inet/inet_state_autogen.go b/pkg/sentry/inet/inet_state_autogen.go new file mode 100644 index 000000000..30af4fb91 --- /dev/null +++ b/pkg/sentry/inet/inet_state_autogen.go @@ -0,0 +1,70 @@ +// automatically generated by stateify. + +package inet + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *TCPBufferSize) StateTypeName() string { + return "pkg/sentry/inet.TCPBufferSize" +} + +func (t *TCPBufferSize) StateFields() []string { + return []string{ + "Min", + "Default", + "Max", + } +} + +func (t *TCPBufferSize) beforeSave() {} + +// +checklocksignore +func (t *TCPBufferSize) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Min) + stateSinkObject.Save(1, &t.Default) + stateSinkObject.Save(2, &t.Max) +} + +func (t *TCPBufferSize) afterLoad() {} + +// +checklocksignore +func (t *TCPBufferSize) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Min) + stateSourceObject.Load(1, &t.Default) + stateSourceObject.Load(2, &t.Max) +} + +func (n *Namespace) StateTypeName() string { + return "pkg/sentry/inet.Namespace" +} + +func (n *Namespace) StateFields() []string { + return []string{ + "creator", + "isRoot", + } +} + +func (n *Namespace) beforeSave() {} + +// +checklocksignore +func (n *Namespace) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.creator) + stateSinkObject.Save(1, &n.isRoot) +} + +// +checklocksignore +func (n *Namespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &n.creator) + stateSourceObject.Load(1, &n.isRoot) + stateSourceObject.AfterLoad(n.afterLoad) +} + +func init() { + state.Register((*TCPBufferSize)(nil)) + state.Register((*Namespace)(nil)) +} diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD deleted file mode 100644 index c613f4932..000000000 --- a/pkg/sentry/kernel/BUILD +++ /dev/null @@ -1,318 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test", "proto_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "pending_signals_list", - out = "pending_signals_list.go", - package = "kernel", - prefix = "pendingSignal", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*pendingSignal", - "Linker": "*pendingSignal", - }, -) - -go_template_instance( - name = "process_group_list", - out = "process_group_list.go", - package = "kernel", - prefix = "processGroup", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*ProcessGroup", - "Linker": "*ProcessGroup", - }, -) - -go_template_instance( - name = "seqatomic_taskgoroutineschedinfo", - out = "seqatomic_taskgoroutineschedinfo_unsafe.go", - package = "kernel", - suffix = "TaskGoroutineSchedInfo", - template = "//pkg/sync/seqatomic:generic_seqatomic", - types = { - "Value": "TaskGoroutineSchedInfo", - }, -) - -go_template_instance( - name = "session_list", - out = "session_list.go", - package = "kernel", - prefix = "session", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Session", - "Linker": "*Session", - }, -) - -go_template_instance( - name = "task_list", - out = "task_list.go", - package = "kernel", - prefix = "task", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Task", - "Linker": "*Task", - }, -) - -go_template_instance( - name = "socket_list", - out = "socket_list.go", - package = "kernel", - prefix = "socket", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*SocketRecordVFS1", - "Linker": "*SocketRecordVFS1", - }, -) - -go_template_instance( - name = "fd_table_refs", - out = "fd_table_refs.go", - package = "kernel", - prefix = "FDTable", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "FDTable", - }, -) - -go_template_instance( - name = "fs_context_refs", - out = "fs_context_refs.go", - package = "kernel", - prefix = "FSContext", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "FSContext", - }, -) - -go_template_instance( - name = "ipc_namespace_refs", - out = "ipc_namespace_refs.go", - package = "kernel", - prefix = "IPCNamespace", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "IPCNamespace", - }, -) - -go_template_instance( - name = "process_group_refs", - out = "process_group_refs.go", - package = "kernel", - prefix = "ProcessGroup", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "ProcessGroup", - }, -) - -go_template_instance( - name = "session_refs", - out = "session_refs.go", - package = "kernel", - prefix = "Session", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "Session", - }, -) - -proto_library( - name = "uncaught_signal", - srcs = ["uncaught_signal.proto"], - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_proto"], -) - -go_library( - name = "kernel", - srcs = [ - "abstract_socket_namespace.go", - "aio.go", - "cgroup.go", - "context.go", - "fd_table.go", - "fd_table_refs.go", - "fd_table_unsafe.go", - "fs_context.go", - "fs_context_refs.go", - "ipc_namespace.go", - "ipc_namespace_refs.go", - "kcov.go", - "kcov_unsafe.go", - "kernel.go", - "kernel_opts.go", - "kernel_state.go", - "pending_signals.go", - "pending_signals_list.go", - "pending_signals_state.go", - "posixtimer.go", - "process_group_list.go", - "process_group_refs.go", - "ptrace.go", - "ptrace_amd64.go", - "ptrace_arm64.go", - "rseq.go", - "seccomp.go", - "seqatomic_taskgoroutineschedinfo_unsafe.go", - "session_list.go", - "session_refs.go", - "sessions.go", - "signal.go", - "signal_handlers.go", - "socket_list.go", - "syscalls.go", - "syscalls_state.go", - "syslog.go", - "task.go", - "task_acct.go", - "task_block.go", - "task_cgroup.go", - "task_clone.go", - "task_context.go", - "task_exec.go", - "task_exit.go", - "task_futex.go", - "task_identity.go", - "task_image.go", - "task_list.go", - "task_log.go", - "task_net.go", - "task_run.go", - "task_sched.go", - "task_signals.go", - "task_start.go", - "task_stop.go", - "task_syscall.go", - "task_usermem.go", - "task_work.go", - "thread_group.go", - "threads.go", - "timekeeper.go", - "timekeeper_state.go", - "tty.go", - "uts_namespace.go", - "vdso.go", - "version.go", - ], - imports = [ - "gvisor.dev/gvisor/pkg/bpf", - "gvisor.dev/gvisor/pkg/sentry/device", - "gvisor.dev/gvisor/pkg/tcpip", - ], - marshal = True, - visibility = ["//:sandbox"], - deps = [ - ":uncaught_signal_go_proto", - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/abi/linux/errno", - "//pkg/amutex", - "//pkg/bits", - "//pkg/bpf", - "//pkg/cleanup", - "//pkg/context", - "//pkg/coverage", - "//pkg/cpuid", - "//pkg/errors", - "//pkg/errors/linuxerr", - "//pkg/eventchannel", - "//pkg/fspath", - "//pkg/goid", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/metric", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fs/timerfd", - "//pkg/sentry/fsbridge", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/fsimpl/pipefs", - "//pkg/sentry/fsimpl/sockfs", - "//pkg/sentry/fsimpl/timerfd", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/hostcpu", - "//pkg/sentry/inet", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/futex", - "//pkg/sentry/kernel/msgqueue", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/kernel/semaphore", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/loader", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/socket/netlink/port", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/time", - "//pkg/sentry/unimpl", - "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/state", - "//pkg/state/statefile", - "//pkg/state/wire", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/stack", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "kernel_test", - size = "small", - srcs = [ - "fd_table_test.go", - "table_test.go", - "task_test.go", - "timekeeper_test.go", - ], - library = ":kernel", - deps = [ - "//pkg/abi", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/arch", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/filetest", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/limits", - "//pkg/sentry/pgalloc", - "//pkg/sentry/time", - "//pkg/sentry/usage", - "//pkg/sync", - ], -) diff --git a/pkg/sentry/kernel/README.md b/pkg/sentry/kernel/README.md deleted file mode 100644 index 427311be8..000000000 --- a/pkg/sentry/kernel/README.md +++ /dev/null @@ -1,108 +0,0 @@ -This package contains: - -- A (partial) emulation of the "core Linux kernel", which governs task - execution and scheduling, system call dispatch, and signal handling. See - below for details. - -- The top-level interface for the sentry's Linux kernel emulation in general, - used by the `main` function of all versions of the sentry. This interface - revolves around the `Env` type (defined in `kernel.go`). - -# Background - -In Linux, each schedulable context is referred to interchangeably as a "task" or -"thread". Tasks can be divided into userspace and kernel tasks. In the sentry, -scheduling is managed by the Go runtime, so each schedulable context is a -goroutine; only "userspace" (application) contexts are referred to as tasks, and -represented by Task objects. (From this point forward, "task" refers to the -sentry's notion of a task unless otherwise specified.) - -At a high level, Linux application threads can be thought of as repeating a "run -loop": - -- Some amount of application code is executed in userspace. - -- A trap (explicit syscall invocation, hardware interrupt or exception, etc.) - causes control flow to switch to the kernel. - -- Some amount of kernel code is executed in kernelspace, e.g. to handle the - cause of the trap. - -- The kernel "returns from the trap" into application code. - -Analogously, each task in the sentry is associated with a *task goroutine* that -executes that task's run loop (`Task.run` in `task_run.go`). However, the -sentry's task run loop differs in structure in order to support saving execution -state to, and resuming execution from, checkpoints. - -While in kernelspace, a Linux thread can be descheduled (cease execution) in a -variety of ways: - -- It can yield or be preempted, becoming temporarily descheduled but still - runnable. At present, the sentry delegates scheduling of runnable threads to - the Go runtime. - -- It can exit, becoming permanently descheduled. The sentry's equivalent is - returning from `Task.run`, terminating the task goroutine. - -- It can enter interruptible sleep, a state in which it can be woken by a - caller-defined wakeup or the receipt of a signal. In the sentry, - interruptible sleep (which is ambiguously referred to as *blocking*) is - implemented by making all events that can end blocking (including signal - notifications) communicated via Go channels and using `select` to multiplex - wakeup sources; see `task_block.go`. - -- It can enter uninterruptible sleep, a state in which it can only be woken by - a caller-defined wakeup. Killable sleep is a closely related variant in - which the task can also be woken by SIGKILL. (These definitions also include - Linux's "group-stopped" (`TASK_STOPPED`) and "ptrace-stopped" - (`TASK_TRACED`) states.) - -To maximize compatibility with Linux, sentry checkpointing appears as a spurious -signal-delivery interrupt on all tasks; interrupted system calls return `EINTR` -or are automatically restarted as usual. However, these semantics require that -uninterruptible and killable sleeps do not appear to be interrupted. In other -words, the state of the task, including its progress through the interrupted -operation, must be preserved by checkpointing. For many such sleeps, the wakeup -condition is application-controlled, making it infeasible to wait for the sleep -to end before checkpointing. Instead, we must support checkpointing progress -through sleeping operations. - -# Implementation - -We break the task's control flow graph into *states*, delimited by: - -1. Points where uninterruptible and killable sleeps may occur. For example, - there exists a state boundary between signal dequeueing and signal delivery - because there may be an intervening ptrace signal-delivery-stop. - -2. Points where sleep-induced branches may "rejoin" normal execution. For - example, the syscall exit state exists because it can be reached immediately - following a synchronous syscall, or after a task that is sleeping in - `execve()` or `vfork()` resumes execution. - -3. Points containing large branches. This is strictly for organizational - purposes. For example, the state that processes interrupt-signaled - conditions is kept separate from the main "app" state to reduce the size of - the latter. - -4. `SyscallReinvoke`, which does not correspond to anything in Linux, and - exists solely to serve the autosave feature. - -![dot -Tpng -Goverlap=false -orun_states.png run_states.dot](g3doc/run_states.png "Task control flow graph") - -States before which a stop may occur are represented as implementations of the -`taskRunState` interface named `run(state)`, allowing them to be saved and -restored. States that cannot be immediately preceded by a stop are simply `Task` -methods named `do(state)`. - -Conditions that can require task goroutines to cease execution for unknown -lengths of time are called *stops*. Stops are divided into *internal stops*, -which are stops whose start and end conditions are implemented within the -sentry, and *external stops*, which are stops whose start and end conditions are -not known to the sentry. Hence all uninterruptible and killable sleeps are -internal stops, and the existence of a pending checkpoint operation is an -external stop. Internal stops are reified into instances of the `TaskStop` type, -while external stops are merely counted. The task run loop alternates between -checking for stops and advancing the task's state. This allows checkpointing to -hold tasks in a stopped state while waiting for all tasks in the system to stop. diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD deleted file mode 100644 index 7a1a36454..000000000 --- a/pkg/sentry/kernel/auth/BUILD +++ /dev/null @@ -1,71 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_credentials", - out = "atomicptr_credentials_unsafe.go", - package = "auth", - suffix = "Credentials", - template = "//pkg/sync/atomicptr:generic_atomicptr", - types = { - "Value": "Credentials", - }, -) - -go_template_instance( - name = "id_map_range", - out = "id_map_range.go", - package = "auth", - prefix = "idMap", - template = "//pkg/segment:generic_range", - types = { - "T": "uint32", - }, -) - -go_template_instance( - name = "id_map_set", - out = "id_map_set.go", - consts = { - "minDegree": "3", - }, - package = "auth", - prefix = "idMap", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint32", - "Range": "idMapRange", - "Value": "uint32", - "Functions": "idMapFunctions", - }, -) - -go_library( - name = "auth", - srcs = [ - "atomicptr_credentials_unsafe.go", - "auth.go", - "capability_set.go", - "context.go", - "credentials.go", - "id.go", - "id_map.go", - "id_map_functions.go", - "id_map_range.go", - "id_map_set.go", - "user_namespace.go", - ], - marshal = True, - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/sync", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go new file mode 100644 index 000000000..4535c958f --- /dev/null +++ b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go @@ -0,0 +1,37 @@ +package auth + +import ( + "sync/atomic" + "unsafe" +) + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtrCredentials struct { + ptr unsafe.Pointer `state:".(*Credentials)"` +} + +func (p *AtomicPtrCredentials) savePtr() *Credentials { + return p.Load() +} + +func (p *AtomicPtrCredentials) loadPtr(v *Credentials) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtrCredentials) Load() *Credentials { + return (*Credentials)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtrCredentials) Store(x *Credentials) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sentry/kernel/auth/auth_abi_autogen_unsafe.go b/pkg/sentry/kernel/auth/auth_abi_autogen_unsafe.go new file mode 100644 index 000000000..393dcea8c --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_abi_autogen_unsafe.go @@ -0,0 +1,274 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package auth + +import ( + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*GID)(nil) +var _ marshal.Marshallable = (*UID)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (gid *GID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (gid *GID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*gid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (gid *GID) UnmarshalBytes(src []byte) { + *gid = GID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (gid *GID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (gid *GID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(gid), uintptr(gid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (gid *GID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(gid), unsafe.Pointer(&src[0]), uintptr(gid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (gid *GID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (gid *GID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return gid.CopyOutN(cc, addr, gid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (gid *GID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (gid *GID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return int64(length), err +} + +// CopyGIDSliceIn copies in a slice of GID objects from the task's memory. +//go:nosplit +func CopyGIDSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []GID) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&dst) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that dst + // must live until the use above. + runtime.KeepAlive(dst) // escapes: replaced by intrinsic. + return length, err +} + +// CopyGIDSliceOut copies a slice of GID objects to the task's memory. +//go:nosplit +func CopyGIDSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []GID) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&src) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyOutBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that src + // must live until the use above. + runtime.KeepAlive(src) // escapes: replaced by intrinsic. + return length, err +} + +// MarshalUnsafeGIDSlice is like GID.MarshalUnsafe, but for a []GID. +func MarshalUnsafeGIDSlice(src []GID, dst []byte) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + dst = dst[:size*count] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst))) + return size*count, nil +} + +// UnmarshalUnsafeGIDSlice is like GID.UnmarshalUnsafe, but for a []GID. +func UnmarshalUnsafeGIDSlice(dst []GID, src []byte) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*GID)(nil).SizeBytes() + + src = src[:(size*count)] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src))) + return size*count, nil +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (uid *UID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (uid *UID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*uid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (uid *UID) UnmarshalBytes(src []byte) { + *uid = UID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (uid *UID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (uid *UID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(uid), uintptr(uid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (uid *UID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(uid), unsafe.Pointer(&src[0]), uintptr(uid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (uid *UID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (uid *UID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return uid.CopyOutN(cc, addr, uid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (uid *UID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (uid *UID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/kernel/auth/auth_state_autogen.go b/pkg/sentry/kernel/auth/auth_state_autogen.go new file mode 100644 index 000000000..5a7f55933 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_state_autogen.go @@ -0,0 +1,280 @@ +// automatically generated by stateify. + +package auth + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *Credentials) StateTypeName() string { + return "pkg/sentry/kernel/auth.Credentials" +} + +func (c *Credentials) StateFields() []string { + return []string{ + "RealKUID", + "EffectiveKUID", + "SavedKUID", + "RealKGID", + "EffectiveKGID", + "SavedKGID", + "ExtraKGIDs", + "PermittedCaps", + "InheritableCaps", + "EffectiveCaps", + "BoundingCaps", + "KeepCaps", + "UserNamespace", + } +} + +func (c *Credentials) beforeSave() {} + +// +checklocksignore +func (c *Credentials) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.RealKUID) + stateSinkObject.Save(1, &c.EffectiveKUID) + stateSinkObject.Save(2, &c.SavedKUID) + stateSinkObject.Save(3, &c.RealKGID) + stateSinkObject.Save(4, &c.EffectiveKGID) + stateSinkObject.Save(5, &c.SavedKGID) + stateSinkObject.Save(6, &c.ExtraKGIDs) + stateSinkObject.Save(7, &c.PermittedCaps) + stateSinkObject.Save(8, &c.InheritableCaps) + stateSinkObject.Save(9, &c.EffectiveCaps) + stateSinkObject.Save(10, &c.BoundingCaps) + stateSinkObject.Save(11, &c.KeepCaps) + stateSinkObject.Save(12, &c.UserNamespace) +} + +func (c *Credentials) afterLoad() {} + +// +checklocksignore +func (c *Credentials) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.RealKUID) + stateSourceObject.Load(1, &c.EffectiveKUID) + stateSourceObject.Load(2, &c.SavedKUID) + stateSourceObject.Load(3, &c.RealKGID) + stateSourceObject.Load(4, &c.EffectiveKGID) + stateSourceObject.Load(5, &c.SavedKGID) + stateSourceObject.Load(6, &c.ExtraKGIDs) + stateSourceObject.Load(7, &c.PermittedCaps) + stateSourceObject.Load(8, &c.InheritableCaps) + stateSourceObject.Load(9, &c.EffectiveCaps) + stateSourceObject.Load(10, &c.BoundingCaps) + stateSourceObject.Load(11, &c.KeepCaps) + stateSourceObject.Load(12, &c.UserNamespace) +} + +func (i *IDMapEntry) StateTypeName() string { + return "pkg/sentry/kernel/auth.IDMapEntry" +} + +func (i *IDMapEntry) StateFields() []string { + return []string{ + "FirstID", + "FirstParentID", + "Length", + } +} + +func (i *IDMapEntry) beforeSave() {} + +// +checklocksignore +func (i *IDMapEntry) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.FirstID) + stateSinkObject.Save(1, &i.FirstParentID) + stateSinkObject.Save(2, &i.Length) +} + +func (i *IDMapEntry) afterLoad() {} + +// +checklocksignore +func (i *IDMapEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.FirstID) + stateSourceObject.Load(1, &i.FirstParentID) + stateSourceObject.Load(2, &i.Length) +} + +func (r *idMapRange) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapRange" +} + +func (r *idMapRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (r *idMapRange) beforeSave() {} + +// +checklocksignore +func (r *idMapRange) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Start) + stateSinkObject.Save(1, &r.End) +} + +func (r *idMapRange) afterLoad() {} + +// +checklocksignore +func (r *idMapRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Start) + stateSourceObject.Load(1, &r.End) +} + +func (s *idMapSet) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapSet" +} + +func (s *idMapSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *idMapSet) beforeSave() {} + +// +checklocksignore +func (s *idMapSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *idMapSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *idMapSet) afterLoad() {} + +// +checklocksignore +func (s *idMapSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*idMapSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*idMapSegmentDataSlices)) }) +} + +func (n *idMapnode) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapnode" +} + +func (n *idMapnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *idMapnode) beforeSave() {} + +// +checklocksignore +func (n *idMapnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *idMapnode) afterLoad() {} + +// +checklocksignore +func (n *idMapnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (i *idMapSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/kernel/auth.idMapSegmentDataSlices" +} + +func (i *idMapSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (i *idMapSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (i *idMapSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Start) + stateSinkObject.Save(1, &i.End) + stateSinkObject.Save(2, &i.Values) +} + +func (i *idMapSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (i *idMapSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Start) + stateSourceObject.Load(1, &i.End) + stateSourceObject.Load(2, &i.Values) +} + +func (ns *UserNamespace) StateTypeName() string { + return "pkg/sentry/kernel/auth.UserNamespace" +} + +func (ns *UserNamespace) StateFields() []string { + return []string{ + "parent", + "owner", + "uidMapFromParent", + "uidMapToParent", + "gidMapFromParent", + "gidMapToParent", + } +} + +func (ns *UserNamespace) beforeSave() {} + +// +checklocksignore +func (ns *UserNamespace) StateSave(stateSinkObject state.Sink) { + ns.beforeSave() + stateSinkObject.Save(0, &ns.parent) + stateSinkObject.Save(1, &ns.owner) + stateSinkObject.Save(2, &ns.uidMapFromParent) + stateSinkObject.Save(3, &ns.uidMapToParent) + stateSinkObject.Save(4, &ns.gidMapFromParent) + stateSinkObject.Save(5, &ns.gidMapToParent) +} + +func (ns *UserNamespace) afterLoad() {} + +// +checklocksignore +func (ns *UserNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ns.parent) + stateSourceObject.Load(1, &ns.owner) + stateSourceObject.Load(2, &ns.uidMapFromParent) + stateSourceObject.Load(3, &ns.uidMapToParent) + stateSourceObject.Load(4, &ns.gidMapFromParent) + stateSourceObject.Load(5, &ns.gidMapToParent) +} + +func init() { + state.Register((*Credentials)(nil)) + state.Register((*IDMapEntry)(nil)) + state.Register((*idMapRange)(nil)) + state.Register((*idMapSet)(nil)) + state.Register((*idMapnode)(nil)) + state.Register((*idMapSegmentDataSlices)(nil)) + state.Register((*UserNamespace)(nil)) +} diff --git a/pkg/sentry/kernel/auth/auth_unsafe_abi_autogen_unsafe.go b/pkg/sentry/kernel/auth/auth_unsafe_abi_autogen_unsafe.go new file mode 100644 index 000000000..ebcd3911b --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_unsafe_abi_autogen_unsafe.go @@ -0,0 +1,7 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package auth + +import ( +) + diff --git a/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go b/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go new file mode 100644 index 000000000..61f7e4ce9 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go @@ -0,0 +1,37 @@ +// automatically generated by stateify. + +package auth + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *AtomicPtrCredentials) StateTypeName() string { + return "pkg/sentry/kernel/auth.AtomicPtrCredentials" +} + +func (p *AtomicPtrCredentials) StateFields() []string { + return []string{ + "ptr", + } +} + +func (p *AtomicPtrCredentials) beforeSave() {} + +// +checklocksignore +func (p *AtomicPtrCredentials) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var ptrValue *Credentials = p.savePtr() + stateSinkObject.SaveValue(0, ptrValue) +} + +func (p *AtomicPtrCredentials) afterLoad() {} + +// +checklocksignore +func (p *AtomicPtrCredentials) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*Credentials), func(y interface{}) { p.loadPtr(y.(*Credentials)) }) +} + +func init() { + state.Register((*AtomicPtrCredentials)(nil)) +} diff --git a/pkg/sentry/kernel/auth/id_map_range.go b/pkg/sentry/kernel/auth/id_map_range.go new file mode 100644 index 000000000..19d542716 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_range.go @@ -0,0 +1,76 @@ +package auth + +// A Range represents a contiguous range of T. +// +// +stateify savable +type idMapRange struct { + // Start is the inclusive start of the range. + Start uint32 + + // End is the exclusive end of the range. + End uint32 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +// +//go:nosplit +func (r idMapRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +// +//go:nosplit +func (r idMapRange) Length() uint32 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +// +//go:nosplit +func (r idMapRange) Contains(x uint32) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +// +//go:nosplit +func (r idMapRange) Overlaps(r2 idMapRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +// +//go:nosplit +func (r idMapRange) IsSupersetOf(r2 idMapRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +// +//go:nosplit +func (r idMapRange) Intersect(r2 idMapRange) idMapRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +// +//go:nosplit +func (r idMapRange) CanSplitAt(x uint32) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/kernel/auth/id_map_set.go b/pkg/sentry/kernel/auth/id_map_set.go new file mode 100644 index 000000000..479753981 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_set.go @@ -0,0 +1,1643 @@ +package auth + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const idMaptrackGaps = 0 + +var _ = uint8(idMaptrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type idMapdynamicGap [idMaptrackGaps]uint32 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *idMapdynamicGap) Get() uint32 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *idMapdynamicGap) Set(v uint32) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + idMapminDegree = 3 + + idMapmaxDegree = 2 * idMapminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type idMapSet struct { + root idMapnode `state:".(*idMapSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *idMapSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *idMapSet) IsEmptyRange(r idMapRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *idMapSet) Span() uint32 { + var sz uint32 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *idMapSet) SpanRange(r idMapRange) uint32 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint32 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *idMapSet) FirstSegment() idMapIterator { + if s.root.nrSegments == 0 { + return idMapIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *idMapSet) LastSegment() idMapIterator { + if s.root.nrSegments == 0 { + return idMapIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *idMapSet) FirstGap() idMapGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return idMapGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *idMapSet) LastGap() idMapGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return idMapGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *idMapSet) Find(key uint32) (idMapIterator, idMapGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return idMapIterator{n, i}, idMapGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return idMapIterator{}, idMapGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *idMapSet) FindSegment(key uint32) idMapIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *idMapSet) LowerBoundSegment(min uint32) idMapIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *idMapSet) UpperBoundSegment(max uint32) idMapIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *idMapSet) FindGap(key uint32) idMapGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *idMapSet) LowerBoundGap(min uint32) idMapGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *idMapSet) UpperBoundGap(max uint32) idMapGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *idMapSet) Add(r idMapRange, val uint32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *idMapSet) AddWithoutMerging(r idMapRange, val uint32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *idMapSet) Insert(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (idMapFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := idMaptrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (idMapFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (idMapFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := idMaptrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *idMapSet) InsertWithoutMerging(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *idMapSet) InsertWithoutMergingUnchecked(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := idMaptrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return idMapIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *idMapSet) Remove(seg idMapIterator) idMapGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if idMaptrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + idMapFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if idMaptrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(idMapGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *idMapSet) RemoveAll() { + s.root = idMapnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *idMapSet) RemoveRange(r idMapRange) idMapGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *idMapSet) Merge(first, second idMapIterator) idMapIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *idMapSet) MergeUnchecked(first, second idMapIterator) idMapIterator { + if first.End() == second.Start() { + if mval, ok := (idMapFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return idMapIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *idMapSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *idMapSet) MergeRange(r idMapRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *idMapSet) MergeAdjacent(r idMapRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *idMapSet) Split(seg idMapIterator, split uint32) (idMapIterator, idMapIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *idMapSet) SplitUnchecked(seg idMapIterator, split uint32) (idMapIterator, idMapIterator) { + val1, val2 := (idMapFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), idMapRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *idMapSet) SplitAt(split uint32) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *idMapSet) Isolate(seg idMapIterator, r idMapRange) idMapIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *idMapSet) ApplyContiguous(r idMapRange, fn func(seg idMapIterator)) idMapGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return idMapGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return idMapGapIterator{} + } + } +} + +// +stateify savable +type idMapnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *idMapnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap idMapdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [idMapmaxDegree - 1]idMapRange + values [idMapmaxDegree - 1]uint32 + children [idMapmaxDegree]*idMapnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *idMapnode) firstSegment() idMapIterator { + for n.hasChildren { + n = n.children[0] + } + return idMapIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *idMapnode) lastSegment() idMapIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return idMapIterator{n, n.nrSegments - 1} +} + +func (n *idMapnode) prevSibling() *idMapnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *idMapnode) nextSibling() *idMapnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *idMapnode) rebalanceBeforeInsert(gap idMapGapIterator) idMapGapIterator { + if n.nrSegments < idMapmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:idMapminDegree-1], n.keys[:idMapminDegree-1]) + copy(left.values[:idMapminDegree-1], n.values[:idMapminDegree-1]) + copy(right.keys[:idMapminDegree-1], n.keys[idMapminDegree:]) + copy(right.values[:idMapminDegree-1], n.values[idMapminDegree:]) + n.keys[0], n.values[0] = n.keys[idMapminDegree-1], n.values[idMapminDegree-1] + idMapzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:idMapminDegree], n.children[:idMapminDegree]) + copy(right.children[:idMapminDegree], n.children[idMapminDegree:]) + idMapzeroNodeSlice(n.children[2:]) + for i := 0; i < idMapminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if idMaptrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < idMapminDegree { + return idMapGapIterator{left, gap.index} + } + return idMapGapIterator{right, gap.index - idMapminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[idMapminDegree-1], n.values[idMapminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:idMapminDegree-1], n.keys[idMapminDegree:]) + copy(sibling.values[:idMapminDegree-1], n.values[idMapminDegree:]) + idMapzeroValueSlice(n.values[idMapminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:idMapminDegree], n.children[idMapminDegree:]) + idMapzeroNodeSlice(n.children[idMapminDegree:]) + for i := 0; i < idMapminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = idMapminDegree - 1 + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < idMapminDegree { + return gap + } + return idMapGapIterator{sibling, gap.index - idMapminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator { + for { + if n.nrSegments >= idMapminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= idMapminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + idMapFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return idMapGapIterator{n, 0} + } + if gap.node == n { + return idMapGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= idMapminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + idMapFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return idMapGapIterator{n, n.nrSegments} + } + return idMapGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return idMapGapIterator{p, gap.index} + } + if gap.node == right { + return idMapGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *idMapnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = idMapGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + idMapFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if idMaptrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *idMapnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *idMapnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *idMapnode) calculateMaxGapLeaf() uint32 { + max := idMapGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (idMapGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *idMapnode) calculateMaxGapInternal() uint32 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *idMapnode) searchFirstLargeEnoughGap(minSize uint32) idMapGapIterator { + if n.maxGap.Get() < minSize { + return idMapGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := idMapGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *idMapnode) searchLastLargeEnoughGap(minSize uint32) idMapGapIterator { + if n.maxGap.Get() < minSize { + return idMapGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := idMapGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type idMapIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *idMapnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg idMapIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg idMapIterator) Range() idMapRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg idMapIterator) Start() uint32 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg idMapIterator) End() uint32 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg idMapIterator) SetRangeUnchecked(r idMapRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetRange(r idMapRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg idMapIterator) SetStartUnchecked(start uint32) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetStart(start uint32) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg idMapIterator) SetEndUnchecked(end uint32) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetEnd(end uint32) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg idMapIterator) Value() uint32 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg idMapIterator) ValuePtr() *uint32 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg idMapIterator) SetValue(val uint32) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg idMapIterator) PrevSegment() idMapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return idMapIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return idMapIterator{} + } + return idMapsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg idMapIterator) NextSegment() idMapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return idMapIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return idMapIterator{} + } + return idMapsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg idMapIterator) PrevGap() idMapGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return idMapGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg idMapIterator) NextGap() idMapGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return idMapGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg idMapIterator) PrevNonEmpty() (idMapIterator, idMapGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return idMapIterator{}, gap + } + return gap.PrevSegment(), idMapGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg idMapIterator) NextNonEmpty() (idMapIterator, idMapGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return idMapIterator{}, gap + } + return gap.NextSegment(), idMapGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type idMapGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *idMapnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap idMapGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap idMapGapIterator) Range() idMapRange { + return idMapRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap idMapGapIterator) Start() uint32 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return idMapFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap idMapGapIterator) End() uint32 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return idMapFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap idMapGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap idMapGapIterator) PrevSegment() idMapIterator { + return idMapsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap idMapGapIterator) NextSegment() idMapIterator { + return idMapsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap idMapGapIterator) PrevGap() idMapGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return idMapGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap idMapGapIterator) NextGap() idMapGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return idMapGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap idMapGapIterator) NextLargeEnoughGap(minSize uint32) idMapGapIterator { + if idMaptrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap idMapGapIterator) nextLargeEnoughGapHelper(minSize uint32) idMapGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return idMapGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap idMapGapIterator) PrevLargeEnoughGap(minSize uint32) idMapGapIterator { + if idMaptrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap idMapGapIterator) prevLargeEnoughGapHelper(minSize uint32) idMapGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return idMapGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func idMapsegmentBeforePosition(n *idMapnode, i int) idMapIterator { + for i == 0 { + if n.parent == nil { + return idMapIterator{} + } + n, i = n.parent, n.parentIndex + } + return idMapIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func idMapsegmentAfterPosition(n *idMapnode, i int) idMapIterator { + for i == n.nrSegments { + if n.parent == nil { + return idMapIterator{} + } + n, i = n.parent, n.parentIndex + } + return idMapIterator{n, i} +} + +func idMapzeroValueSlice(slice []uint32) { + + for i := range slice { + idMapFunctions{}.ClearValue(&slice[i]) + } +} + +func idMapzeroNodeSlice(slice []*idMapnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *idMapSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *idMapnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *idMapnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if idMaptrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type idMapSegmentDataSlices struct { + Start []uint32 + End []uint32 + Values []uint32 +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *idMapSet) ExportSortedSlices() *idMapSegmentDataSlices { + var sds idMapSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *idMapSet) ImportSortedSlices(sds *idMapSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := idMapRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *idMapSet) segmentTestCheck(expectedSegments int, segFunc func(int, idMapRange, uint32) error) error { + havePrev := false + prev := uint32(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *idMapSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *idMapSet) saveRoot() *idMapSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *idMapSet) loadRoot(sds *idMapSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD deleted file mode 100644 index 9d26392c0..000000000 --- a/pkg/sentry/kernel/contexttest/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "contexttest", - testonly = 1, - srcs = ["contexttest.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - ], -) diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go deleted file mode 100644 index 22c340e56..000000000 --- a/pkg/sentry/kernel/contexttest/contexttest.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package contexttest provides a test context.Context which includes -// a dummy kernel pointing to a valid platform. -package contexttest - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" -) - -// Context returns a Context that may be used in tests. Uses ptrace as the -// platform.Platform, and provides a stub kernel that only serves to point to -// the platform. -func Context(tb testing.TB) context.Context { - ctx := contexttest.Context(tb) - k := &kernel.Kernel{ - Platform: platform.FromContext(ctx), - } - k.SetMemoryFile(pgalloc.MemoryFileFromContext(ctx)) - ctx.(*contexttest.TestContext).RegisterValue(kernel.CtxKernel, k) - return ctx -} diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD deleted file mode 100644 index 723a85f64..000000000 --- a/pkg/sentry/kernel/epoll/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "epoll_list", - out = "epoll_list.go", - package = "epoll", - prefix = "pollEntry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*pollEntry", - "Linker": "*pollEntry", - }, -) - -go_library( - name = "epoll", - srcs = [ - "epoll.go", - "epoll_list.go", - "epoll_state.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/refs", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sync", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "epoll_test", - size = "small", - srcs = [ - "epoll_test.go", - ], - library = ":epoll", - deps = [ - "//pkg/sentry/contexttest", - "//pkg/sentry/fs/filetest", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/epoll/epoll_list.go b/pkg/sentry/kernel/epoll/epoll_list.go new file mode 100644 index 000000000..b6abe2de9 --- /dev/null +++ b/pkg/sentry/kernel/epoll/epoll_list.go @@ -0,0 +1,221 @@ +package epoll + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type pollEntryElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (pollEntryElementMapper) linkerFor(elem *pollEntry) *pollEntry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type pollEntryList struct { + head *pollEntry + tail *pollEntry +} + +// Reset resets list l to the empty state. +func (l *pollEntryList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *pollEntryList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *pollEntryList) Front() *pollEntry { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *pollEntryList) Back() *pollEntry { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *pollEntryList) Len() (count int) { + for e := l.Front(); e != nil; e = (pollEntryElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *pollEntryList) PushFront(e *pollEntry) { + linker := pollEntryElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + pollEntryElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *pollEntryList) PushBack(e *pollEntry) { + linker := pollEntryElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + pollEntryElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *pollEntryList) PushBackList(m *pollEntryList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + pollEntryElementMapper{}.linkerFor(l.tail).SetNext(m.head) + pollEntryElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *pollEntryList) InsertAfter(b, e *pollEntry) { + bLinker := pollEntryElementMapper{}.linkerFor(b) + eLinker := pollEntryElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + pollEntryElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *pollEntryList) InsertBefore(a, e *pollEntry) { + aLinker := pollEntryElementMapper{}.linkerFor(a) + eLinker := pollEntryElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + pollEntryElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *pollEntryList) Remove(e *pollEntry) { + linker := pollEntryElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + pollEntryElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + pollEntryElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type pollEntryEntry struct { + next *pollEntry + prev *pollEntry +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *pollEntryEntry) Next() *pollEntry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *pollEntryEntry) Prev() *pollEntry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *pollEntryEntry) SetNext(elem *pollEntry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *pollEntryEntry) SetPrev(elem *pollEntry) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/epoll/epoll_state_autogen.go b/pkg/sentry/kernel/epoll/epoll_state_autogen.go new file mode 100644 index 000000000..44bd520ac --- /dev/null +++ b/pkg/sentry/kernel/epoll/epoll_state_autogen.go @@ -0,0 +1,192 @@ +// automatically generated by stateify. + +package epoll + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *FileIdentifier) StateTypeName() string { + return "pkg/sentry/kernel/epoll.FileIdentifier" +} + +func (f *FileIdentifier) StateFields() []string { + return []string{ + "File", + "Fd", + } +} + +func (f *FileIdentifier) beforeSave() {} + +// +checklocksignore +func (f *FileIdentifier) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.File) + stateSinkObject.Save(1, &f.Fd) +} + +func (f *FileIdentifier) afterLoad() {} + +// +checklocksignore +func (f *FileIdentifier) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &f.File) + stateSourceObject.Load(1, &f.Fd) +} + +func (p *pollEntry) StateTypeName() string { + return "pkg/sentry/kernel/epoll.pollEntry" +} + +func (p *pollEntry) StateFields() []string { + return []string{ + "pollEntryEntry", + "id", + "userData", + "mask", + "flags", + "epoll", + } +} + +func (p *pollEntry) beforeSave() {} + +// +checklocksignore +func (p *pollEntry) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pollEntryEntry) + stateSinkObject.Save(1, &p.id) + stateSinkObject.Save(2, &p.userData) + stateSinkObject.Save(3, &p.mask) + stateSinkObject.Save(4, &p.flags) + stateSinkObject.Save(5, &p.epoll) +} + +// +checklocksignore +func (p *pollEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pollEntryEntry) + stateSourceObject.LoadWait(1, &p.id) + stateSourceObject.Load(2, &p.userData) + stateSourceObject.Load(3, &p.mask) + stateSourceObject.Load(4, &p.flags) + stateSourceObject.Load(5, &p.epoll) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func (e *EventPoll) StateTypeName() string { + return "pkg/sentry/kernel/epoll.EventPoll" +} + +func (e *EventPoll) StateFields() []string { + return []string{ + "files", + "readyList", + "waitingList", + "disabledList", + } +} + +func (e *EventPoll) beforeSave() {} + +// +checklocksignore +func (e *EventPoll) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + if !state.IsZeroValue(&e.FilePipeSeek) { + state.Failf("FilePipeSeek is %#v, expected zero", &e.FilePipeSeek) + } + if !state.IsZeroValue(&e.FileNotDirReaddir) { + state.Failf("FileNotDirReaddir is %#v, expected zero", &e.FileNotDirReaddir) + } + if !state.IsZeroValue(&e.FileNoFsync) { + state.Failf("FileNoFsync is %#v, expected zero", &e.FileNoFsync) + } + if !state.IsZeroValue(&e.FileNoopFlush) { + state.Failf("FileNoopFlush is %#v, expected zero", &e.FileNoopFlush) + } + if !state.IsZeroValue(&e.FileNoIoctl) { + state.Failf("FileNoIoctl is %#v, expected zero", &e.FileNoIoctl) + } + if !state.IsZeroValue(&e.FileNoMMap) { + state.Failf("FileNoMMap is %#v, expected zero", &e.FileNoMMap) + } + if !state.IsZeroValue(&e.Queue) { + state.Failf("Queue is %#v, expected zero", &e.Queue) + } + stateSinkObject.Save(0, &e.files) + stateSinkObject.Save(1, &e.readyList) + stateSinkObject.Save(2, &e.waitingList) + stateSinkObject.Save(3, &e.disabledList) +} + +// +checklocksignore +func (e *EventPoll) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.files) + stateSourceObject.Load(1, &e.readyList) + stateSourceObject.Load(2, &e.waitingList) + stateSourceObject.Load(3, &e.disabledList) + stateSourceObject.AfterLoad(e.afterLoad) +} + +func (l *pollEntryList) StateTypeName() string { + return "pkg/sentry/kernel/epoll.pollEntryList" +} + +func (l *pollEntryList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *pollEntryList) beforeSave() {} + +// +checklocksignore +func (l *pollEntryList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *pollEntryList) afterLoad() {} + +// +checklocksignore +func (l *pollEntryList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *pollEntryEntry) StateTypeName() string { + return "pkg/sentry/kernel/epoll.pollEntryEntry" +} + +func (e *pollEntryEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *pollEntryEntry) beforeSave() {} + +// +checklocksignore +func (e *pollEntryEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *pollEntryEntry) afterLoad() {} + +// +checklocksignore +func (e *pollEntryEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func init() { + state.Register((*FileIdentifier)(nil)) + state.Register((*pollEntry)(nil)) + state.Register((*EventPoll)(nil)) + state.Register((*pollEntryList)(nil)) + state.Register((*pollEntryEntry)(nil)) +} diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go deleted file mode 100644 index 8ef6cb3e7..000000000 --- a/pkg/sentry/kernel/epoll/epoll_test.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package epoll - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs/filetest" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestFileDestroyed(t *testing.T) { - f := filetest.NewTestFile(t) - id := FileIdentifier{f, 12} - - ctx := contexttest.Context(t) - efile := NewEventPoll(ctx) - e := efile.FileOperations.(*EventPoll) - if err := e.AddEntry(id, 0, waiter.ReadableEvents, [2]int32{}); err != nil { - t.Fatalf("addEntry failed: %v", err) - } - - // Check that we get an event reported twice in a row. - evt := e.ReadEvents(1) - if len(evt) != 1 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 1, len(evt)) - } - - evt = e.ReadEvents(1) - if len(evt) != 1 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 1, len(evt)) - } - - // Destroy the file. Check that we get no more events. - f.DecRef(ctx) - - evt = e.ReadEvents(1) - if len(evt) != 0 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 0, len(evt)) - } - -} diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD deleted file mode 100644 index 564c3d42e..000000000 --- a/pkg/sentry/kernel/eventfd/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "eventfd", - srcs = ["eventfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fdnotifier", - "//pkg/hostarch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "eventfd_test", - size = "small", - srcs = ["eventfd_test.go"], - library = ":eventfd", - deps = [ - "//pkg/sentry/contexttest", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go new file mode 100644 index 000000000..596920c42 --- /dev/null +++ b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go @@ -0,0 +1,45 @@ +// automatically generated by stateify. + +package eventfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (e *EventOperations) StateTypeName() string { + return "pkg/sentry/kernel/eventfd.EventOperations" +} + +func (e *EventOperations) StateFields() []string { + return []string{ + "val", + "semMode", + "hostfd", + } +} + +func (e *EventOperations) beforeSave() {} + +// +checklocksignore +func (e *EventOperations) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + if !state.IsZeroValue(&e.wq) { + state.Failf("wq is %#v, expected zero", &e.wq) + } + stateSinkObject.Save(0, &e.val) + stateSinkObject.Save(1, &e.semMode) + stateSinkObject.Save(2, &e.hostfd) +} + +func (e *EventOperations) afterLoad() {} + +// +checklocksignore +func (e *EventOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.val) + stateSourceObject.Load(1, &e.semMode) + stateSourceObject.Load(2, &e.hostfd) +} + +func init() { + state.Register((*EventOperations)(nil)) +} diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go deleted file mode 100644 index 1b9e60b3a..000000000 --- a/pkg/sentry/kernel/eventfd/eventfd_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package eventfd - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestEventfd(t *testing.T) { - initVals := []uint64{ - 0, - // Using a non-zero initial value verifies that writing to an - // eventfd signals when the eventfd's counter was already - // non-zero. - 343, - } - - for _, initVal := range initVals { - ctx := contexttest.Context(t) - - // Make a new event that is writable. - event := New(ctx, initVal, false) - - // Register a callback for a write event. - w, ch := waiter.NewChannelEntry(nil) - event.EventRegister(&w, waiter.ReadableEvents) - defer event.EventUnregister(&w) - - data := []byte("00000124") - // Create and submit a write request. - n, err := event.Writev(ctx, usermem.BytesIOSequence(data)) - if err != nil { - t.Fatal(err) - } - if n != 8 { - t.Errorf("eventfd.write wrote %d bytes, not full int64", n) - } - - // Check if the callback fired due to the write event. - select { - case <-ch: - default: - t.Errorf("Didn't get notified of EventIn after write") - } - } -} - -func TestEventfdStat(t *testing.T) { - ctx := contexttest.Context(t) - - // Make a new event that is writable. - event := New(ctx, 0, false) - - // Create and submit an stat request. - uattr, err := event.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - t.Fatalf("eventfd stat request failed: %v", err) - } - if uattr.Size != 0 { - t.Fatal("EventFD size should be 0") - } -} diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD deleted file mode 100644 index 6b2dd09da..000000000 --- a/pkg/sentry/kernel/fasync/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "fasync", - srcs = ["fasync.go"], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/fasync/fasync_state_autogen.go b/pkg/sentry/kernel/fasync/fasync_state_autogen.go new file mode 100644 index 000000000..8ec069b92 --- /dev/null +++ b/pkg/sentry/kernel/fasync/fasync_state_autogen.go @@ -0,0 +1,57 @@ +// automatically generated by stateify. + +package fasync + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (a *FileAsync) StateTypeName() string { + return "pkg/sentry/kernel/fasync.FileAsync" +} + +func (a *FileAsync) StateFields() []string { + return []string{ + "e", + "fd", + "requester", + "registered", + "signal", + "recipientPG", + "recipientTG", + "recipientT", + } +} + +func (a *FileAsync) beforeSave() {} + +// +checklocksignore +func (a *FileAsync) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.e) + stateSinkObject.Save(1, &a.fd) + stateSinkObject.Save(2, &a.requester) + stateSinkObject.Save(3, &a.registered) + stateSinkObject.Save(4, &a.signal) + stateSinkObject.Save(5, &a.recipientPG) + stateSinkObject.Save(6, &a.recipientTG) + stateSinkObject.Save(7, &a.recipientT) +} + +func (a *FileAsync) afterLoad() {} + +// +checklocksignore +func (a *FileAsync) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.e) + stateSourceObject.Load(1, &a.fd) + stateSourceObject.Load(2, &a.requester) + stateSourceObject.Load(3, &a.registered) + stateSourceObject.Load(4, &a.signal) + stateSourceObject.Load(5, &a.recipientPG) + stateSourceObject.Load(6, &a.recipientTG) + stateSourceObject.Load(7, &a.recipientT) +} + +func init() { + state.Register((*FileAsync)(nil)) +} diff --git a/pkg/sentry/kernel/fd_table_refs.go b/pkg/sentry/kernel/fd_table_refs.go new file mode 100644 index 000000000..9f3bcc07a --- /dev/null +++ b/pkg/sentry/kernel/fd_table_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const FDTableenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var FDTableobj *FDTable + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type FDTableRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *FDTableRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *FDTableRefs) RefType() string { + return fmt.Sprintf("%T", FDTableobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *FDTableRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *FDTableRefs) LogRefs() bool { + return FDTableenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *FDTableRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *FDTableRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if FDTableenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *FDTableRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if FDTableenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *FDTableRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if FDTableenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *FDTableRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go deleted file mode 100644 index bf5460083..000000000 --- a/pkg/sentry/kernel/fd_table_test.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "runtime" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/filetest" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sync" -) - -const ( - // maxFD is the maximum FD to try to create in the map. - // - // This number of open files has been seen in the wild. - maxFD = 2 * 1024 -) - -func runTest(t testing.TB, fn func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet)) { - t.Helper() // Don't show in stacks. - - // Create the limits and context. - limitSet := limits.NewLimitSet() - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) - ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) - - // Create a test file.; - file := filetest.NewTestFile(t) - - // Create the table. - fdTable := new(FDTable) - fdTable.init() - - // Run the test. - fn(ctx, fdTable, file, limitSet) -} - -// TestFDTableMany allocates maxFD FDs, i.e. maxes out the FDTable, until there -// is no room, then makes sure that NewFDAt works and also that if we remove -// one and add one that works too. -func TestFDTableMany(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - for i := 0; i < maxFD; i++ { - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD) - } - } - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(0, r) in full map: got nil, wanted error") - } - - if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { - t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - i := int32(2) - fdTable.Remove(ctx, i) - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != i { - t.Fatalf("Allocated %v FDs but wanted to allocate %v: %v", i, maxFD, err) - } - }) -} - -func TestFDTableOverLimit(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - if _, err := fdTable.NewFDs(ctx, maxFD, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(maxFD, f): got nil, wanted error") - } - - if _, err := fdTable.NewFDs(ctx, maxFD-2, []*fs.File{file, file, file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(maxFD-2, {f,f,f}): got nil, wanted error") - } - - if fds, err := fdTable.NewFDs(ctx, maxFD-3, []*fs.File{file, file, file}, FDFlags{}); err != nil { - t.Fatalf("fdTable.NewFDs(maxFD-3, {f,f,f}): got %v, wanted nil", err) - } else { - for _, fd := range fds { - fdTable.Remove(ctx, fd) - } - } - - if fds, err := fdTable.NewFDs(ctx, maxFD-1, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != maxFD-1 { - t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) - } else if len(fds) != 1 || fds[0] != 0 { - t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds) - } - }) -} - -// TestFDTable does a set of simple tests to make sure simple adds, removes, -// GetRefs, and DecRefs work. The ordering is just weird enough that a -// table-driven approach seemed clumsy. -func TestFDTable(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet) { - // Cap the limit at one. - limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true) - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err) - } - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error") - } - - // Remove the previous limit. - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) - - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) - } else if len(fds) != 1 || fds[0] != 1 { - t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds) - } - - if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { - t.Fatalf("Replacing FD 1 via fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - if err := fdTable.NewFDAt(ctx, maxFD+1, file, FDFlags{}); err == nil { - t.Fatalf("Using an FD that was too large via fdTable.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1) - } - - if ref, _ := fdTable.Get(1); ref == nil { - t.Fatalf("fdTable.Get(1): got nil, wanted %v", file) - } - - if ref, _ := fdTable.Get(2); ref != nil { - t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref) - } - - ref, _ := fdTable.Remove(ctx, 1) - if ref == nil { - t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success") - } - ref.DecRef(ctx) - - if ref, _ := fdTable.Remove(ctx, 1); ref != nil { - t.Fatalf("r.Remove(1) for a removed FD: got success, want failure") - } - }) -} - -func TestDescriptorFlags(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - if err := fdTable.NewFDAt(ctx, 2, file, FDFlags{CloseOnExec: true}); err != nil { - t.Fatalf("fdTable.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err) - } - - newFile, flags := fdTable.Get(2) - if newFile == nil { - t.Fatalf("fdTable.Get(2): got a %v, wanted nil", newFile) - } - - if !flags.CloseOnExec { - t.Fatalf("new File flags %v don't match original %d\n", flags, 0) - } - }) -} - -func BenchmarkFDLookupAndDecRef(b *testing.B) { - b.StopTimer() // Setup. - - runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) - if err != nil { - b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) - } - - b.StartTimer() // Benchmark. - for i := 0; i < b.N; i++ { - tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef(ctx) - } - }) -} - -func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) { - b.StopTimer() // Setup. - - runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) - if err != nil { - b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) - } - - concurrency := runtime.GOMAXPROCS(0) - if concurrency < 4 { - concurrency = 4 - } - each := b.N / concurrency - - b.StartTimer() // Benchmark. - var wg sync.WaitGroup - for i := 0; i < concurrency; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for i := 0; i < each; i++ { - tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef(ctx) - } - }() - } - wg.Wait() - }) -} diff --git a/pkg/sentry/kernel/fs_context_refs.go b/pkg/sentry/kernel/fs_context_refs.go new file mode 100644 index 000000000..8cbe70d1f --- /dev/null +++ b/pkg/sentry/kernel/fs_context_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const FSContextenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var FSContextobj *FSContext + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type FSContextRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *FSContextRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *FSContextRefs) RefType() string { + return fmt.Sprintf("%T", FSContextobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *FSContextRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *FSContextRefs) LogRefs() bool { + return FSContextenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *FSContextRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *FSContextRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if FSContextenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *FSContextRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if FSContextenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *FSContextRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if FSContextenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *FSContextRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD deleted file mode 100644 index cfdea5cf7..000000000 --- a/pkg/sentry/kernel/futex/BUILD +++ /dev/null @@ -1,61 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_bucket", - out = "atomicptr_bucket_unsafe.go", - package = "futex", - suffix = "Bucket", - template = "//pkg/sync/atomicptr:generic_atomicptr", - types = { - "Value": "bucket", - }, -) - -go_template_instance( - name = "waiter_list", - out = "waiter_list.go", - package = "futex", - prefix = "waiter", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Waiter", - "Linker": "*Waiter", - }, -) - -go_library( - name = "futex", - srcs = [ - "atomicptr_bucket_unsafe.go", - "futex.go", - "waiter_list.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/sentry/memmap", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "futex_test", - size = "small", - srcs = ["futex_test.go"], - library = ":futex", - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sync", - ], -) diff --git a/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go new file mode 100644 index 000000000..d3fdf09b0 --- /dev/null +++ b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go @@ -0,0 +1,37 @@ +package futex + +import ( + "sync/atomic" + "unsafe" +) + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtrBucket struct { + ptr unsafe.Pointer `state:".(*bucket)"` +} + +func (p *AtomicPtrBucket) savePtr() *bucket { + return p.Load() +} + +func (p *AtomicPtrBucket) loadPtr(v *bucket) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtrBucket) Load() *bucket { + return (*bucket)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtrBucket) Store(x *bucket) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sentry/kernel/futex/futex_state_autogen.go b/pkg/sentry/kernel/futex/futex_state_autogen.go new file mode 100644 index 000000000..6e22b313a --- /dev/null +++ b/pkg/sentry/kernel/futex/futex_state_autogen.go @@ -0,0 +1,122 @@ +// automatically generated by stateify. + +package futex + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (b *bucket) StateTypeName() string { + return "pkg/sentry/kernel/futex.bucket" +} + +func (b *bucket) StateFields() []string { + return []string{} +} + +func (b *bucket) beforeSave() {} + +// +checklocksignore +func (b *bucket) StateSave(stateSinkObject state.Sink) { + b.beforeSave() + if !state.IsZeroValue(&b.waiters) { + state.Failf("waiters is %#v, expected zero", &b.waiters) + } +} + +func (b *bucket) afterLoad() {} + +// +checklocksignore +func (b *bucket) StateLoad(stateSourceObject state.Source) { +} + +func (m *Manager) StateTypeName() string { + return "pkg/sentry/kernel/futex.Manager" +} + +func (m *Manager) StateFields() []string { + return []string{ + "sharedBucket", + } +} + +func (m *Manager) beforeSave() {} + +// +checklocksignore +func (m *Manager) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + if !state.IsZeroValue(&m.privateBuckets) { + state.Failf("privateBuckets is %#v, expected zero", &m.privateBuckets) + } + stateSinkObject.Save(0, &m.sharedBucket) +} + +func (m *Manager) afterLoad() {} + +// +checklocksignore +func (m *Manager) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.sharedBucket) +} + +func (l *waiterList) StateTypeName() string { + return "pkg/sentry/kernel/futex.waiterList" +} + +func (l *waiterList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *waiterList) beforeSave() {} + +// +checklocksignore +func (l *waiterList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *waiterList) afterLoad() {} + +// +checklocksignore +func (l *waiterList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *waiterEntry) StateTypeName() string { + return "pkg/sentry/kernel/futex.waiterEntry" +} + +func (e *waiterEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *waiterEntry) beforeSave() {} + +// +checklocksignore +func (e *waiterEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *waiterEntry) afterLoad() {} + +// +checklocksignore +func (e *waiterEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func init() { + state.Register((*bucket)(nil)) + state.Register((*Manager)(nil)) + state.Register((*waiterList)(nil)) + state.Register((*waiterEntry)(nil)) +} diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go deleted file mode 100644 index 04c136f87..000000000 --- a/pkg/sentry/kernel/futex/futex_test.go +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package futex - -import ( - "math" - "runtime" - "sync/atomic" - "testing" - "unsafe" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sync" -) - -// testData implements the Target interface, and allows us to -// treat the address passed for futex operations as an index in -// a byte slice for testing simplicity. -type testData struct { - context.Context - data []byte -} - -const sizeofInt32 = 4 - -func newTestData(size uint) testData { - return testData{ - data: make([]byte, size), - } -} - -func (t testData) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { - val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), new) - return val, nil -} - -func (t testData) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { - if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), old, new) { - return old, nil - } - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil -} - -func (t testData) LoadUint32(addr hostarch.Addr) (uint32, error) { - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil -} - -func (t testData) GetSharedKey(addr hostarch.Addr) (Key, error) { - return Key{ - Kind: KindSharedMappable, - Offset: uint64(addr), - }, nil -} - -func futexKind(private bool) string { - if private { - return "private" - } - return "shared" -} - -func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr hostarch.Addr, private bool, val uint32, bitmask uint32) *Waiter { - w := NewWaiter() - if err := m.WaitPrepare(w, ta, addr, private, val, bitmask); err != nil { - t.Fatalf("WaitPrepare failed: %v", err) - } - return w -} - -func TestFutexWake(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start waiting for wakeup. - w := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w, d) - - // Perform a wakeup. - if n, err := m.Wake(d, 0, private, ^uint32(0), 1); err != nil || n != 1 { - t.Errorf("Wake: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect the waiter to have been woken. - if !w.woken() { - t.Error("waiter not woken") - } - }) - } -} - -func TestFutexWakeBitmask(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start waiting for wakeup. - w := newPreparedTestWaiter(t, m, d, 0, private, 0, 0x0000ffff) - defer m.WaitComplete(w, d) - - // Perform a wakeup using the wrong bitmask. - if n, err := m.Wake(d, 0, private, 0xffff0000, 1); err != nil || n != 0 { - t.Errorf("Wake with non-matching bitmask: got (%d, %v), wanted (0, nil)", n, err) - } - - // Expect the waiter to still be waiting. - if w.woken() { - t.Error("waiter woken unexpectedly") - } - - // Perform a wakeup using the right bitmask. - if n, err := m.Wake(d, 0, private, 0x00000001, 1); err != nil || n != 1 { - t.Errorf("Wake with matching bitmask: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect that the waiter was woken. - if !w.woken() { - t.Error("waiter not woken") - } - }) - } -} - -func TestFutexWakeTwo(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start three waiters waiting for wakeup. - var ws [3]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i], d) - } - - // Perform two wakeups. - const wakeups = 2 - if n, err := m.Wake(d, 0, private, ^uint32(0), 2); err != nil || n != wakeups { - t.Errorf("Wake: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly two waiters were woken. - // We don't get guarantees about exactly which two, - // (although we expect them to be w1 and w2). - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -func TestFutexWakeUnrelated(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(2 * sizeofInt32) - - // Start two waiters waiting for wakeup on different addresses. - w1 := newPreparedTestWaiter(t, m, d, 0*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 1*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform two wakeups on the second address. - if n, err := m.Wake(d, 1*sizeofInt32, private, ^uint32(0), 2); err != nil || n != 1 { - t.Errorf("Wake: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect that only the second waiter was woken. - if w1.woken() { - t.Error("w1 woken unexpectedly") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(2 * sizeofInt32) - - // Perform wakeups with no waiters. - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 0); err != nil || n != 0 { - t.Fatalf("WakeOp: got (%d, %v), wanted (0, nil)", n, err) - } - }) - } -} - -func TestWakeOpFirstNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform 10 wakeups on address 0. - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 0, 0); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that both waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpSecondNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address sizeofInt32. - w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform 10 wakeups on address sizeofInt32 (contingent on - // d.Op(0), which should succeed). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 0, 10, 0); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that both waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpSecondNonEmptyFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address sizeofInt32. - w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Perform 10 wakeups on address sizeofInt32 (contingent on - // d.Op(1), which should fail). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 0, 10, 1); err != nil || n != 0 { - t.Errorf("WakeOp: got (%d, %v), wanted (0, nil)", n, err) - } - - // Expect that neither waiter was woken. - if w1.woken() { - t.Error("w1 woken unexpectedly") - } - if w2.woken() { - t.Error("w2 woken unexpectedly") - } - }) - } -} - -func TestWakeOpAllNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Add two waiters on address sizeofInt32. - w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3, d) - w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4, d) - - // Perform 10 wakeups on address 0 (unconditionally), and 10 - // wakeups on address sizeofInt32 (contingent on d.Op(0), which - // should succeed). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 0); err != nil || n != 4 { - t.Errorf("WakeOp: got (%d, %v), wanted (4, nil)", n, err) - } - - // Expect that all waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - if !w3.woken() { - t.Error("w3 not woken") - } - if !w4.woken() { - t.Error("w4 not woken") - } - }) - } -} - -func TestWakeOpAllNonEmptyFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1, d) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2, d) - - // Add two waiters on address sizeofInt32. - w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3, d) - w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4, d) - - // Perform 10 wakeups on address 0 (unconditionally), and 10 - // wakeups on address sizeofInt32 (contingent on d.Op(1), which - // should fail). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 1); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that only the first two waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - if w3.woken() { - t.Error("w3 woken unexpectedly") - } - if w4.woken() { - t.Error("w4 woken unexpectedly") - } - }) - } -} - -func TestWakeOpSameAddress(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add four waiters on address 0. - var ws [4]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i], d) - } - - // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup - // on address 0 (contingent on d.Op(0), which should succeed). - const wakeups = 2 - if n, err := m.WakeOp(d, 0, 0, private, 1, 1, 0); err != nil || n != wakeups { - t.Errorf("WakeOp: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly two waiters were woken. - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -func TestWakeOpSameAddressFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add four waiters on address 0. - var ws [4]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i], d) - } - - // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup - // on address 0 (contingent on d.Op(1), which should fail). - const wakeups = 1 - if n, err := m.WakeOp(d, 0, 0, private, 1, 1, 1); err != nil || n != wakeups { - t.Errorf("WakeOp: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly one waiter was woken. - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -const ( - testMutexSize = sizeofInt32 - testMutexLocked uint32 = 1 - testMutexUnlocked uint32 = 0 -) - -// testMutex ties together a testData slice, an address, and a -// futex manager in order to implement the sync.Locker interface. -// Beyond being used as a Locker, this is a simple mechanism for -// changing the underlying values for simpler tests. -type testMutex struct { - a hostarch.Addr - d testData - m *Manager -} - -func newTestMutex(addr hostarch.Addr, d testData, m *Manager) *testMutex { - return &testMutex{a: addr, d: d, m: m} -} - -// Lock acquires the testMutex. -// This may wait for it to be available via the futex manager. -func (t *testMutex) Lock() { - for { - // Attempt to grab the lock. - if atomic.CompareAndSwapUint32( - (*uint32)(unsafe.Pointer(&t.d.data[t.a])), - testMutexUnlocked, - testMutexLocked) { - // Lock held. - return - } - - // Wait for it to be "not locked". - w := NewWaiter() - err := t.m.WaitPrepare(w, t.d, t.a, true, testMutexLocked, ^uint32(0)) - if linuxerr.Equals(linuxerr.EAGAIN, err) { - continue - } - if err != nil { - // Should never happen. - panic("WaitPrepare returned unexpected error: " + err.Error()) - } - <-w.C - t.m.WaitComplete(w, t.d) - } -} - -// Unlock releases the testMutex. -// This will notify any waiters via the futex manager. -func (t *testMutex) Unlock() { - // Unlock. - atomic.StoreUint32((*uint32)(unsafe.Pointer(&t.d.data[t.a])), testMutexUnlocked) - - // Notify all waiters. - t.m.Wake(t.d, t.a, true, ^uint32(0), math.MaxInt32) -} - -// This function was shamelessly stolen from mutex_test.go. -func HammerMutex(l sync.Locker, loops int, cdone chan bool) { - for i := 0; i < loops; i++ { - l.Lock() - runtime.Gosched() - l.Unlock() - } - cdone <- true -} - -func TestMutexStress(t *testing.T) { - m := NewManager() - d := newTestData(testMutexSize) - tm := newTestMutex(0*testMutexSize, d, m) - c := make(chan bool) - - for i := 0; i < 10; i++ { - go HammerMutex(tm, 1000, c) - } - - for i := 0; i < 10; i++ { - <-c - } -} diff --git a/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go b/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go new file mode 100644 index 000000000..84bb180fc --- /dev/null +++ b/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go @@ -0,0 +1,37 @@ +// automatically generated by stateify. + +package futex + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *AtomicPtrBucket) StateTypeName() string { + return "pkg/sentry/kernel/futex.AtomicPtrBucket" +} + +func (p *AtomicPtrBucket) StateFields() []string { + return []string{ + "ptr", + } +} + +func (p *AtomicPtrBucket) beforeSave() {} + +// +checklocksignore +func (p *AtomicPtrBucket) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var ptrValue *bucket = p.savePtr() + stateSinkObject.SaveValue(0, ptrValue) +} + +func (p *AtomicPtrBucket) afterLoad() {} + +// +checklocksignore +func (p *AtomicPtrBucket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*bucket), func(y interface{}) { p.loadPtr(y.(*bucket)) }) +} + +func init() { + state.Register((*AtomicPtrBucket)(nil)) +} diff --git a/pkg/sentry/kernel/futex/waiter_list.go b/pkg/sentry/kernel/futex/waiter_list.go new file mode 100644 index 000000000..24968ce4b --- /dev/null +++ b/pkg/sentry/kernel/futex/waiter_list.go @@ -0,0 +1,221 @@ +package futex + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type waiterElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (waiterElementMapper) linkerFor(elem *Waiter) *Waiter { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type waiterList struct { + head *Waiter + tail *Waiter +} + +// Reset resets list l to the empty state. +func (l *waiterList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *waiterList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *waiterList) Front() *Waiter { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *waiterList) Back() *Waiter { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *waiterList) Len() (count int) { + for e := l.Front(); e != nil; e = (waiterElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *waiterList) PushFront(e *Waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + waiterElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *waiterList) PushBack(e *Waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *waiterList) PushBackList(m *waiterList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) + waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *waiterList) InsertAfter(b, e *Waiter) { + bLinker := waiterElementMapper{}.linkerFor(b) + eLinker := waiterElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + waiterElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *waiterList) InsertBefore(a, e *Waiter) { + aLinker := waiterElementMapper{}.linkerFor(a) + eLinker := waiterElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + waiterElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *waiterList) Remove(e *Waiter) { + linker := waiterElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + waiterElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + waiterElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type waiterEntry struct { + next *Waiter + prev *Waiter +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) Next() *Waiter { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) Prev() *Waiter { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) SetNext(elem *Waiter) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) SetPrev(elem *Waiter) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/g3doc/run_states.dot b/pkg/sentry/kernel/g3doc/run_states.dot deleted file mode 100644 index 7861fe1f5..000000000 --- a/pkg/sentry/kernel/g3doc/run_states.dot +++ /dev/null @@ -1,99 +0,0 @@ -digraph { - subgraph { - App; - } - subgraph { - Interrupt; - InterruptAfterSignalDeliveryStop; - } - subgraph { - Syscall; - SyscallAfterPtraceEventSeccomp; - SyscallEnter; - SyscallAfterSyscallEnterStop; - SyscallAfterSysemuStop; - SyscallInvoke; - SyscallAfterPtraceEventClone; - SyscallAfterExecStop; - SyscallAfterVforkStop; - SyscallReinvoke; - SyscallExit; - } - subgraph { - Vsyscall; - VsyscallAfterPtraceEventSeccomp; - VsyscallInvoke; - } - subgraph { - Exit; - ExitMain; // leave thread group, release resources, reparent children, kill PID namespace and wait if TGID 1 - ExitNotify; // signal parent/tracer, become waitable - ExitDone; // represented by t.runState == nil - } - - // Task exit - Exit -> ExitMain; - ExitMain -> ExitNotify; - ExitNotify -> ExitDone; - - // Execution of untrusted application code - App -> App; - - // Interrupts (usually signal delivery) - App -> Interrupt; - Interrupt -> Interrupt; // if other interrupt conditions may still apply - Interrupt -> Exit; // if killed - - // Syscalls - App -> Syscall; - Syscall -> SyscallEnter; - SyscallEnter -> SyscallInvoke; - SyscallInvoke -> SyscallExit; - SyscallExit -> App; - - // exit, exit_group - SyscallInvoke -> Exit; - - // execve - SyscallInvoke -> SyscallAfterExecStop; - SyscallAfterExecStop -> SyscallExit; - SyscallAfterExecStop -> App; // fatal signal pending - - // vfork - SyscallInvoke -> SyscallAfterVforkStop; - SyscallAfterVforkStop -> SyscallExit; - - // Vsyscalls - App -> Vsyscall; - Vsyscall -> VsyscallInvoke; - Vsyscall -> App; // fault while reading return address from stack - VsyscallInvoke -> App; - - // ptrace-specific branches - Interrupt -> InterruptAfterSignalDeliveryStop; - InterruptAfterSignalDeliveryStop -> Interrupt; - SyscallEnter -> SyscallAfterSyscallEnterStop; - SyscallAfterSyscallEnterStop -> SyscallInvoke; - SyscallAfterSyscallEnterStop -> SyscallExit; // skipped by tracer - SyscallAfterSyscallEnterStop -> App; // fatal signal pending - SyscallEnter -> SyscallAfterSysemuStop; - SyscallAfterSysemuStop -> SyscallExit; - SyscallAfterSysemuStop -> App; // fatal signal pending - SyscallInvoke -> SyscallAfterPtraceEventClone; - SyscallAfterPtraceEventClone -> SyscallExit; - SyscallAfterPtraceEventClone -> SyscallAfterVforkStop; - - // seccomp - Syscall -> App; // SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_KILL, SECCOMP_RET_TRACE without tracer - Syscall -> SyscallAfterPtraceEventSeccomp; // SECCOMP_RET_TRACE - SyscallAfterPtraceEventSeccomp -> SyscallEnter; - SyscallAfterPtraceEventSeccomp -> SyscallExit; // skipped by tracer - SyscallAfterPtraceEventSeccomp -> App; // fatal signal pending - Vsyscall -> VsyscallAfterPtraceEventSeccomp; - VsyscallAfterPtraceEventSeccomp -> VsyscallInvoke; - VsyscallAfterPtraceEventSeccomp -> App; - - // Autosave - SyscallInvoke -> SyscallReinvoke; - SyscallReinvoke -> SyscallInvoke; -} diff --git a/pkg/sentry/kernel/g3doc/run_states.png b/pkg/sentry/kernel/g3doc/run_states.png Binary files differdeleted file mode 100644 index b63b60f02..000000000 --- a/pkg/sentry/kernel/g3doc/run_states.png +++ /dev/null diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD deleted file mode 100644 index e42a94e15..000000000 --- a/pkg/sentry/kernel/ipc/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "ipc", - srcs = [ - "object.go", - "registry.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - ], -) diff --git a/pkg/sentry/kernel/ipc/ipc_state_autogen.go b/pkg/sentry/kernel/ipc/ipc_state_autogen.go new file mode 100644 index 000000000..b74f23a21 --- /dev/null +++ b/pkg/sentry/kernel/ipc/ipc_state_autogen.go @@ -0,0 +1,86 @@ +// automatically generated by stateify. + +package ipc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (o *Object) StateTypeName() string { + return "pkg/sentry/kernel/ipc.Object" +} + +func (o *Object) StateFields() []string { + return []string{ + "UserNS", + "ID", + "Key", + "Creator", + "Owner", + "Perms", + } +} + +func (o *Object) beforeSave() {} + +// +checklocksignore +func (o *Object) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.UserNS) + stateSinkObject.Save(1, &o.ID) + stateSinkObject.Save(2, &o.Key) + stateSinkObject.Save(3, &o.Creator) + stateSinkObject.Save(4, &o.Owner) + stateSinkObject.Save(5, &o.Perms) +} + +func (o *Object) afterLoad() {} + +// +checklocksignore +func (o *Object) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.UserNS) + stateSourceObject.Load(1, &o.ID) + stateSourceObject.Load(2, &o.Key) + stateSourceObject.Load(3, &o.Creator) + stateSourceObject.Load(4, &o.Owner) + stateSourceObject.Load(5, &o.Perms) +} + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/kernel/ipc.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "UserNS", + "objects", + "keysToIDs", + "lastIDUsed", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.UserNS) + stateSinkObject.Save(1, &r.objects) + stateSinkObject.Save(2, &r.keysToIDs) + stateSinkObject.Save(3, &r.lastIDUsed) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.UserNS) + stateSourceObject.Load(1, &r.objects) + stateSourceObject.Load(2, &r.keysToIDs) + stateSourceObject.Load(3, &r.lastIDUsed) +} + +func init() { + state.Register((*Object)(nil)) + state.Register((*Registry)(nil)) +} diff --git a/pkg/sentry/kernel/ipc_namespace_refs.go b/pkg/sentry/kernel/ipc_namespace_refs.go new file mode 100644 index 000000000..1a4c31bb0 --- /dev/null +++ b/pkg/sentry/kernel/ipc_namespace_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const IPCNamespaceenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var IPCNamespaceobj *IPCNamespace + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type IPCNamespaceRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *IPCNamespaceRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *IPCNamespaceRefs) RefType() string { + return fmt.Sprintf("%T", IPCNamespaceobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *IPCNamespaceRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *IPCNamespaceRefs) LogRefs() bool { + return IPCNamespaceenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *IPCNamespaceRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *IPCNamespaceRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if IPCNamespaceenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *IPCNamespaceRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if IPCNamespaceenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *IPCNamespaceRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if IPCNamespaceenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *IPCNamespaceRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go new file mode 100644 index 000000000..83ed0f8ac --- /dev/null +++ b/pkg/sentry/kernel/kernel_abi_autogen_unsafe.go @@ -0,0 +1,224 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*ThreadID)(nil) +var _ marshal.Marshallable = (*vdsoParams)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (tid *ThreadID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (tid *ThreadID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*tid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (tid *ThreadID) UnmarshalBytes(src []byte) { + *tid = ThreadID(int32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (tid *ThreadID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (tid *ThreadID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(tid), uintptr(tid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (tid *ThreadID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(tid), unsafe.Pointer(&src[0]), uintptr(tid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (tid *ThreadID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(tid))) + hdr.Len = tid.SizeBytes() + hdr.Cap = tid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that tid + // must live until the use above. + runtime.KeepAlive(tid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (tid *ThreadID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return tid.CopyOutN(cc, addr, tid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (tid *ThreadID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(tid))) + hdr.Len = tid.SizeBytes() + hdr.Cap = tid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that tid + // must live until the use above. + runtime.KeepAlive(tid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (tid *ThreadID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(tid))) + hdr.Len = tid.SizeBytes() + hdr.Cap = tid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that tid + // must live until the use above. + runtime.KeepAlive(tid) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (v *vdsoParams) SizeBytes() int { + return 64 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (v *vdsoParams) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicReady)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseCycles)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicBaseRef)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.monotonicFrequency)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeReady)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseCycles)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeBaseRef)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(v.realtimeFrequency)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (v *vdsoParams) UnmarshalBytes(src []byte) { + v.monotonicReady = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicBaseCycles = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicBaseRef = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.monotonicFrequency = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeReady = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeBaseCycles = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeBaseRef = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + v.realtimeFrequency = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (v *vdsoParams) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (v *vdsoParams) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(v), uintptr(v.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (v *vdsoParams) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(v), unsafe.Pointer(&src[0]), uintptr(v.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (v *vdsoParams) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (v *vdsoParams) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return v.CopyOutN(cc, addr, v.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (v *vdsoParams) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (v *vdsoParams) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(v))) + hdr.Len = v.SizeBytes() + hdr.Cap = v.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that v + // must live until the use above. + runtime.KeepAlive(v) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/kernel/kernel_amd64_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_amd64_abi_autogen_unsafe.go new file mode 100644 index 000000000..988351acf --- /dev/null +++ b/pkg/sentry/kernel/kernel_amd64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build amd64 +// +build amd64 + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_amd64_state_autogen.go b/pkg/sentry/kernel/kernel_amd64_state_autogen.go new file mode 100644 index 000000000..859999676 --- /dev/null +++ b/pkg/sentry/kernel/kernel_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 +// +build amd64 + +package kernel diff --git a/pkg/sentry/kernel/kernel_arm64_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_arm64_abi_autogen_unsafe.go new file mode 100644 index 000000000..823d08b19 --- /dev/null +++ b/pkg/sentry/kernel/kernel_arm64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build arm64 +// +build arm64 + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_arm64_state_autogen.go b/pkg/sentry/kernel/kernel_arm64_state_autogen.go new file mode 100644 index 000000000..7340674a6 --- /dev/null +++ b/pkg/sentry/kernel/kernel_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package kernel diff --git a/pkg/sentry/kernel/kernel_opts_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_opts_abi_autogen_unsafe.go new file mode 100644 index 000000000..dcca4b9dd --- /dev/null +++ b/pkg/sentry/kernel/kernel_opts_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build go1.1 +// +build go1.1 + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_opts_state_autogen.go b/pkg/sentry/kernel/kernel_opts_state_autogen.go new file mode 100644 index 000000000..d878d9a06 --- /dev/null +++ b/pkg/sentry/kernel/kernel_opts_state_autogen.go @@ -0,0 +1,35 @@ +// automatically generated by stateify. + +//go:build go1.1 +// +build go1.1 + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SpecialOpts) StateTypeName() string { + return "pkg/sentry/kernel.SpecialOpts" +} + +func (s *SpecialOpts) StateFields() []string { + return []string{} +} + +func (s *SpecialOpts) beforeSave() {} + +// +checklocksignore +func (s *SpecialOpts) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *SpecialOpts) afterLoad() {} + +// +checklocksignore +func (s *SpecialOpts) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*SpecialOpts)(nil)) +} diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go new file mode 100644 index 000000000..dd4ecfe0f --- /dev/null +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -0,0 +1,2602 @@ +// automatically generated by stateify. + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/tcpip" +) + +func (a *abstractEndpoint) StateTypeName() string { + return "pkg/sentry/kernel.abstractEndpoint" +} + +func (a *abstractEndpoint) StateFields() []string { + return []string{ + "ep", + "socket", + "name", + "ns", + } +} + +func (a *abstractEndpoint) beforeSave() {} + +// +checklocksignore +func (a *abstractEndpoint) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.ep) + stateSinkObject.Save(1, &a.socket) + stateSinkObject.Save(2, &a.name) + stateSinkObject.Save(3, &a.ns) +} + +func (a *abstractEndpoint) afterLoad() {} + +// +checklocksignore +func (a *abstractEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.ep) + stateSourceObject.Load(1, &a.socket) + stateSourceObject.Load(2, &a.name) + stateSourceObject.Load(3, &a.ns) +} + +func (a *AbstractSocketNamespace) StateTypeName() string { + return "pkg/sentry/kernel.AbstractSocketNamespace" +} + +func (a *AbstractSocketNamespace) StateFields() []string { + return []string{ + "endpoints", + } +} + +func (a *AbstractSocketNamespace) beforeSave() {} + +// +checklocksignore +func (a *AbstractSocketNamespace) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.endpoints) +} + +func (a *AbstractSocketNamespace) afterLoad() {} + +// +checklocksignore +func (a *AbstractSocketNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.endpoints) +} + +func (c *Cgroup) StateTypeName() string { + return "pkg/sentry/kernel.Cgroup" +} + +func (c *Cgroup) StateFields() []string { + return []string{ + "Dentry", + "CgroupImpl", + } +} + +func (c *Cgroup) beforeSave() {} + +// +checklocksignore +func (c *Cgroup) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.Dentry) + stateSinkObject.Save(1, &c.CgroupImpl) +} + +func (c *Cgroup) afterLoad() {} + +// +checklocksignore +func (c *Cgroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.Dentry) + stateSourceObject.Load(1, &c.CgroupImpl) +} + +func (h *hierarchy) StateTypeName() string { + return "pkg/sentry/kernel.hierarchy" +} + +func (h *hierarchy) StateFields() []string { + return []string{ + "id", + "controllers", + "fs", + } +} + +func (h *hierarchy) beforeSave() {} + +// +checklocksignore +func (h *hierarchy) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.id) + stateSinkObject.Save(1, &h.controllers) + stateSinkObject.Save(2, &h.fs) +} + +func (h *hierarchy) afterLoad() {} + +// +checklocksignore +func (h *hierarchy) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.id) + stateSourceObject.Load(1, &h.controllers) + stateSourceObject.Load(2, &h.fs) +} + +func (r *CgroupRegistry) StateTypeName() string { + return "pkg/sentry/kernel.CgroupRegistry" +} + +func (r *CgroupRegistry) StateFields() []string { + return []string{ + "lastHierarchyID", + "controllers", + "hierarchies", + } +} + +func (r *CgroupRegistry) beforeSave() {} + +// +checklocksignore +func (r *CgroupRegistry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.lastHierarchyID) + stateSinkObject.Save(1, &r.controllers) + stateSinkObject.Save(2, &r.hierarchies) +} + +func (r *CgroupRegistry) afterLoad() {} + +// +checklocksignore +func (r *CgroupRegistry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.lastHierarchyID) + stateSourceObject.Load(1, &r.controllers) + stateSourceObject.Load(2, &r.hierarchies) +} + +func (f *FDFlags) StateTypeName() string { + return "pkg/sentry/kernel.FDFlags" +} + +func (f *FDFlags) StateFields() []string { + return []string{ + "CloseOnExec", + } +} + +func (f *FDFlags) beforeSave() {} + +// +checklocksignore +func (f *FDFlags) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.CloseOnExec) +} + +func (f *FDFlags) afterLoad() {} + +// +checklocksignore +func (f *FDFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.CloseOnExec) +} + +func (d *descriptor) StateTypeName() string { + return "pkg/sentry/kernel.descriptor" +} + +func (d *descriptor) StateFields() []string { + return []string{ + "file", + "fileVFS2", + "flags", + } +} + +func (d *descriptor) beforeSave() {} + +// +checklocksignore +func (d *descriptor) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.file) + stateSinkObject.Save(1, &d.fileVFS2) + stateSinkObject.Save(2, &d.flags) +} + +func (d *descriptor) afterLoad() {} + +// +checklocksignore +func (d *descriptor) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.file) + stateSourceObject.Load(1, &d.fileVFS2) + stateSourceObject.Load(2, &d.flags) +} + +func (f *FDTable) StateTypeName() string { + return "pkg/sentry/kernel.FDTable" +} + +func (f *FDTable) StateFields() []string { + return []string{ + "FDTableRefs", + "k", + "next", + "used", + "descriptorTable", + } +} + +func (f *FDTable) beforeSave() {} + +// +checklocksignore +func (f *FDTable) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + var descriptorTableValue map[int32]descriptor = f.saveDescriptorTable() + stateSinkObject.SaveValue(4, descriptorTableValue) + stateSinkObject.Save(0, &f.FDTableRefs) + stateSinkObject.Save(1, &f.k) + stateSinkObject.Save(2, &f.next) + stateSinkObject.Save(3, &f.used) +} + +func (f *FDTable) afterLoad() {} + +// +checklocksignore +func (f *FDTable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.FDTableRefs) + stateSourceObject.Load(1, &f.k) + stateSourceObject.Load(2, &f.next) + stateSourceObject.Load(3, &f.used) + stateSourceObject.LoadValue(4, new(map[int32]descriptor), func(y interface{}) { f.loadDescriptorTable(y.(map[int32]descriptor)) }) +} + +func (r *FDTableRefs) StateTypeName() string { + return "pkg/sentry/kernel.FDTableRefs" +} + +func (r *FDTableRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *FDTableRefs) beforeSave() {} + +// +checklocksignore +func (r *FDTableRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *FDTableRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (f *FSContext) StateTypeName() string { + return "pkg/sentry/kernel.FSContext" +} + +func (f *FSContext) StateFields() []string { + return []string{ + "FSContextRefs", + "root", + "rootVFS2", + "cwd", + "cwdVFS2", + "umask", + } +} + +func (f *FSContext) beforeSave() {} + +// +checklocksignore +func (f *FSContext) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.FSContextRefs) + stateSinkObject.Save(1, &f.root) + stateSinkObject.Save(2, &f.rootVFS2) + stateSinkObject.Save(3, &f.cwd) + stateSinkObject.Save(4, &f.cwdVFS2) + stateSinkObject.Save(5, &f.umask) +} + +func (f *FSContext) afterLoad() {} + +// +checklocksignore +func (f *FSContext) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.FSContextRefs) + stateSourceObject.Load(1, &f.root) + stateSourceObject.Load(2, &f.rootVFS2) + stateSourceObject.Load(3, &f.cwd) + stateSourceObject.Load(4, &f.cwdVFS2) + stateSourceObject.Load(5, &f.umask) +} + +func (r *FSContextRefs) StateTypeName() string { + return "pkg/sentry/kernel.FSContextRefs" +} + +func (r *FSContextRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *FSContextRefs) beforeSave() {} + +// +checklocksignore +func (r *FSContextRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *FSContextRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (i *IPCNamespace) StateTypeName() string { + return "pkg/sentry/kernel.IPCNamespace" +} + +func (i *IPCNamespace) StateFields() []string { + return []string{ + "IPCNamespaceRefs", + "userNS", + "queues", + "semaphores", + "shms", + } +} + +func (i *IPCNamespace) beforeSave() {} + +// +checklocksignore +func (i *IPCNamespace) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.IPCNamespaceRefs) + stateSinkObject.Save(1, &i.userNS) + stateSinkObject.Save(2, &i.queues) + stateSinkObject.Save(3, &i.semaphores) + stateSinkObject.Save(4, &i.shms) +} + +func (i *IPCNamespace) afterLoad() {} + +// +checklocksignore +func (i *IPCNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.IPCNamespaceRefs) + stateSourceObject.Load(1, &i.userNS) + stateSourceObject.Load(2, &i.queues) + stateSourceObject.Load(3, &i.semaphores) + stateSourceObject.Load(4, &i.shms) +} + +func (r *IPCNamespaceRefs) StateTypeName() string { + return "pkg/sentry/kernel.IPCNamespaceRefs" +} + +func (r *IPCNamespaceRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *IPCNamespaceRefs) beforeSave() {} + +// +checklocksignore +func (r *IPCNamespaceRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *IPCNamespaceRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (k *Kernel) StateTypeName() string { + return "pkg/sentry/kernel.Kernel" +} + +func (k *Kernel) StateFields() []string { + return []string{ + "featureSet", + "timekeeper", + "tasks", + "rootUserNamespace", + "rootNetworkNamespace", + "applicationCores", + "useHostCores", + "extraAuxv", + "vdso", + "rootUTSNamespace", + "rootIPCNamespace", + "rootAbstractSocketNamespace", + "futexes", + "globalInit", + "syslog", + "runningTasks", + "cpuClock", + "cpuClockTickerDisabled", + "cpuClockTickerSetting", + "uniqueID", + "nextInotifyCookie", + "netlinkPorts", + "danglingEndpoints", + "sockets", + "socketsVFS2", + "nextSocketRecord", + "deviceRegistry", + "DirentCacheLimiter", + "SpecialOpts", + "vfs", + "hostMount", + "pipeMount", + "shmMount", + "socketMount", + "SleepForAddressSpaceActivation", + "ptraceExceptions", + "YAMAPtraceScope", + "cgroupRegistry", + } +} + +func (k *Kernel) beforeSave() {} + +// +checklocksignore +func (k *Kernel) StateSave(stateSinkObject state.Sink) { + k.beforeSave() + var danglingEndpointsValue []tcpip.Endpoint = k.saveDanglingEndpoints() + stateSinkObject.SaveValue(22, danglingEndpointsValue) + var deviceRegistryValue *device.Registry = k.saveDeviceRegistry() + stateSinkObject.SaveValue(26, deviceRegistryValue) + stateSinkObject.Save(0, &k.featureSet) + stateSinkObject.Save(1, &k.timekeeper) + stateSinkObject.Save(2, &k.tasks) + stateSinkObject.Save(3, &k.rootUserNamespace) + stateSinkObject.Save(4, &k.rootNetworkNamespace) + stateSinkObject.Save(5, &k.applicationCores) + stateSinkObject.Save(6, &k.useHostCores) + stateSinkObject.Save(7, &k.extraAuxv) + stateSinkObject.Save(8, &k.vdso) + stateSinkObject.Save(9, &k.rootUTSNamespace) + stateSinkObject.Save(10, &k.rootIPCNamespace) + stateSinkObject.Save(11, &k.rootAbstractSocketNamespace) + stateSinkObject.Save(12, &k.futexes) + stateSinkObject.Save(13, &k.globalInit) + stateSinkObject.Save(14, &k.syslog) + stateSinkObject.Save(15, &k.runningTasks) + stateSinkObject.Save(16, &k.cpuClock) + stateSinkObject.Save(17, &k.cpuClockTickerDisabled) + stateSinkObject.Save(18, &k.cpuClockTickerSetting) + stateSinkObject.Save(19, &k.uniqueID) + stateSinkObject.Save(20, &k.nextInotifyCookie) + stateSinkObject.Save(21, &k.netlinkPorts) + stateSinkObject.Save(23, &k.sockets) + stateSinkObject.Save(24, &k.socketsVFS2) + stateSinkObject.Save(25, &k.nextSocketRecord) + stateSinkObject.Save(27, &k.DirentCacheLimiter) + stateSinkObject.Save(28, &k.SpecialOpts) + stateSinkObject.Save(29, &k.vfs) + stateSinkObject.Save(30, &k.hostMount) + stateSinkObject.Save(31, &k.pipeMount) + stateSinkObject.Save(32, &k.shmMount) + stateSinkObject.Save(33, &k.socketMount) + stateSinkObject.Save(34, &k.SleepForAddressSpaceActivation) + stateSinkObject.Save(35, &k.ptraceExceptions) + stateSinkObject.Save(36, &k.YAMAPtraceScope) + stateSinkObject.Save(37, &k.cgroupRegistry) +} + +func (k *Kernel) afterLoad() {} + +// +checklocksignore +func (k *Kernel) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &k.featureSet) + stateSourceObject.Load(1, &k.timekeeper) + stateSourceObject.Load(2, &k.tasks) + stateSourceObject.Load(3, &k.rootUserNamespace) + stateSourceObject.Load(4, &k.rootNetworkNamespace) + stateSourceObject.Load(5, &k.applicationCores) + stateSourceObject.Load(6, &k.useHostCores) + stateSourceObject.Load(7, &k.extraAuxv) + stateSourceObject.Load(8, &k.vdso) + stateSourceObject.Load(9, &k.rootUTSNamespace) + stateSourceObject.Load(10, &k.rootIPCNamespace) + stateSourceObject.Load(11, &k.rootAbstractSocketNamespace) + stateSourceObject.Load(12, &k.futexes) + stateSourceObject.Load(13, &k.globalInit) + stateSourceObject.Load(14, &k.syslog) + stateSourceObject.Load(15, &k.runningTasks) + stateSourceObject.Load(16, &k.cpuClock) + stateSourceObject.Load(17, &k.cpuClockTickerDisabled) + stateSourceObject.Load(18, &k.cpuClockTickerSetting) + stateSourceObject.Load(19, &k.uniqueID) + stateSourceObject.Load(20, &k.nextInotifyCookie) + stateSourceObject.Load(21, &k.netlinkPorts) + stateSourceObject.Load(23, &k.sockets) + stateSourceObject.Load(24, &k.socketsVFS2) + stateSourceObject.Load(25, &k.nextSocketRecord) + stateSourceObject.Load(27, &k.DirentCacheLimiter) + stateSourceObject.Load(28, &k.SpecialOpts) + stateSourceObject.Load(29, &k.vfs) + stateSourceObject.Load(30, &k.hostMount) + stateSourceObject.Load(31, &k.pipeMount) + stateSourceObject.Load(32, &k.shmMount) + stateSourceObject.Load(33, &k.socketMount) + stateSourceObject.Load(34, &k.SleepForAddressSpaceActivation) + stateSourceObject.Load(35, &k.ptraceExceptions) + stateSourceObject.Load(36, &k.YAMAPtraceScope) + stateSourceObject.Load(37, &k.cgroupRegistry) + stateSourceObject.LoadValue(22, new([]tcpip.Endpoint), func(y interface{}) { k.loadDanglingEndpoints(y.([]tcpip.Endpoint)) }) + stateSourceObject.LoadValue(26, new(*device.Registry), func(y interface{}) { k.loadDeviceRegistry(y.(*device.Registry)) }) +} + +func (s *SocketRecord) StateTypeName() string { + return "pkg/sentry/kernel.SocketRecord" +} + +func (s *SocketRecord) StateFields() []string { + return []string{ + "k", + "Sock", + "SockVFS2", + "ID", + } +} + +func (s *SocketRecord) beforeSave() {} + +// +checklocksignore +func (s *SocketRecord) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.k) + stateSinkObject.Save(1, &s.Sock) + stateSinkObject.Save(2, &s.SockVFS2) + stateSinkObject.Save(3, &s.ID) +} + +func (s *SocketRecord) afterLoad() {} + +// +checklocksignore +func (s *SocketRecord) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.k) + stateSourceObject.Load(1, &s.Sock) + stateSourceObject.Load(2, &s.SockVFS2) + stateSourceObject.Load(3, &s.ID) +} + +func (s *SocketRecordVFS1) StateTypeName() string { + return "pkg/sentry/kernel.SocketRecordVFS1" +} + +func (s *SocketRecordVFS1) StateFields() []string { + return []string{ + "socketEntry", + "SocketRecord", + } +} + +func (s *SocketRecordVFS1) beforeSave() {} + +// +checklocksignore +func (s *SocketRecordVFS1) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.socketEntry) + stateSinkObject.Save(1, &s.SocketRecord) +} + +func (s *SocketRecordVFS1) afterLoad() {} + +// +checklocksignore +func (s *SocketRecordVFS1) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.socketEntry) + stateSourceObject.Load(1, &s.SocketRecord) +} + +func (p *pendingSignals) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignals" +} + +func (p *pendingSignals) StateFields() []string { + return []string{ + "signals", + } +} + +func (p *pendingSignals) beforeSave() {} + +// +checklocksignore +func (p *pendingSignals) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var signalsValue []savedPendingSignal = p.saveSignals() + stateSinkObject.SaveValue(0, signalsValue) +} + +func (p *pendingSignals) afterLoad() {} + +// +checklocksignore +func (p *pendingSignals) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new([]savedPendingSignal), func(y interface{}) { p.loadSignals(y.([]savedPendingSignal)) }) +} + +func (p *pendingSignalQueue) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignalQueue" +} + +func (p *pendingSignalQueue) StateFields() []string { + return []string{ + "pendingSignalList", + "length", + } +} + +func (p *pendingSignalQueue) beforeSave() {} + +// +checklocksignore +func (p *pendingSignalQueue) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pendingSignalList) + stateSinkObject.Save(1, &p.length) +} + +func (p *pendingSignalQueue) afterLoad() {} + +// +checklocksignore +func (p *pendingSignalQueue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pendingSignalList) + stateSourceObject.Load(1, &p.length) +} + +func (p *pendingSignal) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignal" +} + +func (p *pendingSignal) StateFields() []string { + return []string{ + "pendingSignalEntry", + "SignalInfo", + "timer", + } +} + +func (p *pendingSignal) beforeSave() {} + +// +checklocksignore +func (p *pendingSignal) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pendingSignalEntry) + stateSinkObject.Save(1, &p.SignalInfo) + stateSinkObject.Save(2, &p.timer) +} + +func (p *pendingSignal) afterLoad() {} + +// +checklocksignore +func (p *pendingSignal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pendingSignalEntry) + stateSourceObject.Load(1, &p.SignalInfo) + stateSourceObject.Load(2, &p.timer) +} + +func (l *pendingSignalList) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignalList" +} + +func (l *pendingSignalList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *pendingSignalList) beforeSave() {} + +// +checklocksignore +func (l *pendingSignalList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *pendingSignalList) afterLoad() {} + +// +checklocksignore +func (l *pendingSignalList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *pendingSignalEntry) StateTypeName() string { + return "pkg/sentry/kernel.pendingSignalEntry" +} + +func (e *pendingSignalEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *pendingSignalEntry) beforeSave() {} + +// +checklocksignore +func (e *pendingSignalEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *pendingSignalEntry) afterLoad() {} + +// +checklocksignore +func (e *pendingSignalEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (s *savedPendingSignal) StateTypeName() string { + return "pkg/sentry/kernel.savedPendingSignal" +} + +func (s *savedPendingSignal) StateFields() []string { + return []string{ + "si", + "timer", + } +} + +func (s *savedPendingSignal) beforeSave() {} + +// +checklocksignore +func (s *savedPendingSignal) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.si) + stateSinkObject.Save(1, &s.timer) +} + +func (s *savedPendingSignal) afterLoad() {} + +// +checklocksignore +func (s *savedPendingSignal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.si) + stateSourceObject.Load(1, &s.timer) +} + +func (it *IntervalTimer) StateTypeName() string { + return "pkg/sentry/kernel.IntervalTimer" +} + +func (it *IntervalTimer) StateFields() []string { + return []string{ + "timer", + "target", + "signo", + "id", + "sigval", + "group", + "sigpending", + "sigorphan", + "overrunCur", + "overrunLast", + } +} + +func (it *IntervalTimer) beforeSave() {} + +// +checklocksignore +func (it *IntervalTimer) StateSave(stateSinkObject state.Sink) { + it.beforeSave() + stateSinkObject.Save(0, &it.timer) + stateSinkObject.Save(1, &it.target) + stateSinkObject.Save(2, &it.signo) + stateSinkObject.Save(3, &it.id) + stateSinkObject.Save(4, &it.sigval) + stateSinkObject.Save(5, &it.group) + stateSinkObject.Save(6, &it.sigpending) + stateSinkObject.Save(7, &it.sigorphan) + stateSinkObject.Save(8, &it.overrunCur) + stateSinkObject.Save(9, &it.overrunLast) +} + +func (it *IntervalTimer) afterLoad() {} + +// +checklocksignore +func (it *IntervalTimer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &it.timer) + stateSourceObject.Load(1, &it.target) + stateSourceObject.Load(2, &it.signo) + stateSourceObject.Load(3, &it.id) + stateSourceObject.Load(4, &it.sigval) + stateSourceObject.Load(5, &it.group) + stateSourceObject.Load(6, &it.sigpending) + stateSourceObject.Load(7, &it.sigorphan) + stateSourceObject.Load(8, &it.overrunCur) + stateSourceObject.Load(9, &it.overrunLast) +} + +func (l *processGroupList) StateTypeName() string { + return "pkg/sentry/kernel.processGroupList" +} + +func (l *processGroupList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *processGroupList) beforeSave() {} + +// +checklocksignore +func (l *processGroupList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *processGroupList) afterLoad() {} + +// +checklocksignore +func (l *processGroupList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *processGroupEntry) StateTypeName() string { + return "pkg/sentry/kernel.processGroupEntry" +} + +func (e *processGroupEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *processGroupEntry) beforeSave() {} + +// +checklocksignore +func (e *processGroupEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *processGroupEntry) afterLoad() {} + +// +checklocksignore +func (e *processGroupEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *ProcessGroupRefs) StateTypeName() string { + return "pkg/sentry/kernel.ProcessGroupRefs" +} + +func (r *ProcessGroupRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *ProcessGroupRefs) beforeSave() {} + +// +checklocksignore +func (r *ProcessGroupRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *ProcessGroupRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (p *ptraceOptions) StateTypeName() string { + return "pkg/sentry/kernel.ptraceOptions" +} + +func (p *ptraceOptions) StateFields() []string { + return []string{ + "ExitKill", + "SysGood", + "TraceClone", + "TraceExec", + "TraceExit", + "TraceFork", + "TraceSeccomp", + "TraceVfork", + "TraceVforkDone", + } +} + +func (p *ptraceOptions) beforeSave() {} + +// +checklocksignore +func (p *ptraceOptions) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.ExitKill) + stateSinkObject.Save(1, &p.SysGood) + stateSinkObject.Save(2, &p.TraceClone) + stateSinkObject.Save(3, &p.TraceExec) + stateSinkObject.Save(4, &p.TraceExit) + stateSinkObject.Save(5, &p.TraceFork) + stateSinkObject.Save(6, &p.TraceSeccomp) + stateSinkObject.Save(7, &p.TraceVfork) + stateSinkObject.Save(8, &p.TraceVforkDone) +} + +func (p *ptraceOptions) afterLoad() {} + +// +checklocksignore +func (p *ptraceOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.ExitKill) + stateSourceObject.Load(1, &p.SysGood) + stateSourceObject.Load(2, &p.TraceClone) + stateSourceObject.Load(3, &p.TraceExec) + stateSourceObject.Load(4, &p.TraceExit) + stateSourceObject.Load(5, &p.TraceFork) + stateSourceObject.Load(6, &p.TraceSeccomp) + stateSourceObject.Load(7, &p.TraceVfork) + stateSourceObject.Load(8, &p.TraceVforkDone) +} + +func (s *ptraceStop) StateTypeName() string { + return "pkg/sentry/kernel.ptraceStop" +} + +func (s *ptraceStop) StateFields() []string { + return []string{ + "frozen", + "listen", + } +} + +func (s *ptraceStop) beforeSave() {} + +// +checklocksignore +func (s *ptraceStop) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.frozen) + stateSinkObject.Save(1, &s.listen) +} + +func (s *ptraceStop) afterLoad() {} + +// +checklocksignore +func (s *ptraceStop) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.frozen) + stateSourceObject.Load(1, &s.listen) +} + +func (o *OldRSeqCriticalRegion) StateTypeName() string { + return "pkg/sentry/kernel.OldRSeqCriticalRegion" +} + +func (o *OldRSeqCriticalRegion) StateFields() []string { + return []string{ + "CriticalSection", + "Restart", + } +} + +func (o *OldRSeqCriticalRegion) beforeSave() {} + +// +checklocksignore +func (o *OldRSeqCriticalRegion) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.CriticalSection) + stateSinkObject.Save(1, &o.Restart) +} + +func (o *OldRSeqCriticalRegion) afterLoad() {} + +// +checklocksignore +func (o *OldRSeqCriticalRegion) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.CriticalSection) + stateSourceObject.Load(1, &o.Restart) +} + +func (l *sessionList) StateTypeName() string { + return "pkg/sentry/kernel.sessionList" +} + +func (l *sessionList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *sessionList) beforeSave() {} + +// +checklocksignore +func (l *sessionList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *sessionList) afterLoad() {} + +// +checklocksignore +func (l *sessionList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *sessionEntry) StateTypeName() string { + return "pkg/sentry/kernel.sessionEntry" +} + +func (e *sessionEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *sessionEntry) beforeSave() {} + +// +checklocksignore +func (e *sessionEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *sessionEntry) afterLoad() {} + +// +checklocksignore +func (e *sessionEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *SessionRefs) StateTypeName() string { + return "pkg/sentry/kernel.SessionRefs" +} + +func (r *SessionRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *SessionRefs) beforeSave() {} + +// +checklocksignore +func (r *SessionRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *SessionRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (s *Session) StateTypeName() string { + return "pkg/sentry/kernel.Session" +} + +func (s *Session) StateFields() []string { + return []string{ + "SessionRefs", + "leader", + "id", + "foreground", + "processGroups", + "sessionEntry", + } +} + +func (s *Session) beforeSave() {} + +// +checklocksignore +func (s *Session) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SessionRefs) + stateSinkObject.Save(1, &s.leader) + stateSinkObject.Save(2, &s.id) + stateSinkObject.Save(3, &s.foreground) + stateSinkObject.Save(4, &s.processGroups) + stateSinkObject.Save(5, &s.sessionEntry) +} + +func (s *Session) afterLoad() {} + +// +checklocksignore +func (s *Session) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SessionRefs) + stateSourceObject.Load(1, &s.leader) + stateSourceObject.Load(2, &s.id) + stateSourceObject.Load(3, &s.foreground) + stateSourceObject.Load(4, &s.processGroups) + stateSourceObject.Load(5, &s.sessionEntry) +} + +func (pg *ProcessGroup) StateTypeName() string { + return "pkg/sentry/kernel.ProcessGroup" +} + +func (pg *ProcessGroup) StateFields() []string { + return []string{ + "refs", + "originator", + "id", + "session", + "ancestors", + "processGroupEntry", + } +} + +func (pg *ProcessGroup) beforeSave() {} + +// +checklocksignore +func (pg *ProcessGroup) StateSave(stateSinkObject state.Sink) { + pg.beforeSave() + stateSinkObject.Save(0, &pg.refs) + stateSinkObject.Save(1, &pg.originator) + stateSinkObject.Save(2, &pg.id) + stateSinkObject.Save(3, &pg.session) + stateSinkObject.Save(4, &pg.ancestors) + stateSinkObject.Save(5, &pg.processGroupEntry) +} + +func (pg *ProcessGroup) afterLoad() {} + +// +checklocksignore +func (pg *ProcessGroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &pg.refs) + stateSourceObject.Load(1, &pg.originator) + stateSourceObject.Load(2, &pg.id) + stateSourceObject.Load(3, &pg.session) + stateSourceObject.Load(4, &pg.ancestors) + stateSourceObject.Load(5, &pg.processGroupEntry) +} + +func (sh *SignalHandlers) StateTypeName() string { + return "pkg/sentry/kernel.SignalHandlers" +} + +func (sh *SignalHandlers) StateFields() []string { + return []string{ + "actions", + } +} + +func (sh *SignalHandlers) beforeSave() {} + +// +checklocksignore +func (sh *SignalHandlers) StateSave(stateSinkObject state.Sink) { + sh.beforeSave() + stateSinkObject.Save(0, &sh.actions) +} + +func (sh *SignalHandlers) afterLoad() {} + +// +checklocksignore +func (sh *SignalHandlers) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sh.actions) +} + +func (l *socketList) StateTypeName() string { + return "pkg/sentry/kernel.socketList" +} + +func (l *socketList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *socketList) beforeSave() {} + +// +checklocksignore +func (l *socketList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *socketList) afterLoad() {} + +// +checklocksignore +func (l *socketList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *socketEntry) StateTypeName() string { + return "pkg/sentry/kernel.socketEntry" +} + +func (e *socketEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *socketEntry) beforeSave() {} + +// +checklocksignore +func (e *socketEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *socketEntry) afterLoad() {} + +// +checklocksignore +func (e *socketEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (s *syscallTableInfo) StateTypeName() string { + return "pkg/sentry/kernel.syscallTableInfo" +} + +func (s *syscallTableInfo) StateFields() []string { + return []string{ + "OS", + "Arch", + } +} + +func (s *syscallTableInfo) beforeSave() {} + +// +checklocksignore +func (s *syscallTableInfo) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.OS) + stateSinkObject.Save(1, &s.Arch) +} + +func (s *syscallTableInfo) afterLoad() {} + +// +checklocksignore +func (s *syscallTableInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.OS) + stateSourceObject.Load(1, &s.Arch) +} + +func (s *syslog) StateTypeName() string { + return "pkg/sentry/kernel.syslog" +} + +func (s *syslog) StateFields() []string { + return []string{ + "msg", + } +} + +func (s *syslog) beforeSave() {} + +// +checklocksignore +func (s *syslog) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.msg) +} + +func (s *syslog) afterLoad() {} + +// +checklocksignore +func (s *syslog) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.msg) +} + +func (t *Task) StateTypeName() string { + return "pkg/sentry/kernel.Task" +} + +func (t *Task) StateFields() []string { + return []string{ + "taskNode", + "runState", + "taskWorkCount", + "taskWork", + "haveSyscallReturn", + "gosched", + "yieldCount", + "pendingSignals", + "signalMask", + "realSignalMask", + "haveSavedSignalMask", + "savedSignalMask", + "signalStack", + "groupStopPending", + "groupStopAcknowledged", + "trapStopPending", + "trapNotifyPending", + "stop", + "exitStatus", + "syscallRestartBlock", + "k", + "containerID", + "image", + "fsContext", + "fdTable", + "vforkParent", + "exitState", + "exitTracerNotified", + "exitTracerAcked", + "exitParentNotified", + "exitParentAcked", + "ptraceTracer", + "ptraceTracees", + "ptraceSeized", + "ptraceOpts", + "ptraceSyscallMode", + "ptraceSinglestep", + "ptraceCode", + "ptraceSiginfo", + "ptraceEventMsg", + "ptraceYAMAExceptionAdded", + "ioUsage", + "creds", + "utsns", + "ipcns", + "abstractSockets", + "mountNamespaceVFS2", + "parentDeathSignal", + "syscallFilters", + "cleartid", + "allowedCPUMask", + "cpu", + "niceness", + "numaPolicy", + "numaNodeMask", + "netns", + "rseqCPU", + "oldRSeqCPUAddr", + "rseqAddr", + "rseqSignature", + "robustList", + "startTime", + "kcov", + "cgroups", + } +} + +func (t *Task) beforeSave() {} + +// +checklocksignore +func (t *Task) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + if !state.IsZeroValue(&t.signalQueue) { + state.Failf("signalQueue is %#v, expected zero", &t.signalQueue) + } + var ptraceTracerValue *Task = t.savePtraceTracer() + stateSinkObject.SaveValue(31, ptraceTracerValue) + var syscallFiltersValue []bpf.Program = t.saveSyscallFilters() + stateSinkObject.SaveValue(48, syscallFiltersValue) + stateSinkObject.Save(0, &t.taskNode) + stateSinkObject.Save(1, &t.runState) + stateSinkObject.Save(2, &t.taskWorkCount) + stateSinkObject.Save(3, &t.taskWork) + stateSinkObject.Save(4, &t.haveSyscallReturn) + stateSinkObject.Save(5, &t.gosched) + stateSinkObject.Save(6, &t.yieldCount) + stateSinkObject.Save(7, &t.pendingSignals) + stateSinkObject.Save(8, &t.signalMask) + stateSinkObject.Save(9, &t.realSignalMask) + stateSinkObject.Save(10, &t.haveSavedSignalMask) + stateSinkObject.Save(11, &t.savedSignalMask) + stateSinkObject.Save(12, &t.signalStack) + stateSinkObject.Save(13, &t.groupStopPending) + stateSinkObject.Save(14, &t.groupStopAcknowledged) + stateSinkObject.Save(15, &t.trapStopPending) + stateSinkObject.Save(16, &t.trapNotifyPending) + stateSinkObject.Save(17, &t.stop) + stateSinkObject.Save(18, &t.exitStatus) + stateSinkObject.Save(19, &t.syscallRestartBlock) + stateSinkObject.Save(20, &t.k) + stateSinkObject.Save(21, &t.containerID) + stateSinkObject.Save(22, &t.image) + stateSinkObject.Save(23, &t.fsContext) + stateSinkObject.Save(24, &t.fdTable) + stateSinkObject.Save(25, &t.vforkParent) + stateSinkObject.Save(26, &t.exitState) + stateSinkObject.Save(27, &t.exitTracerNotified) + stateSinkObject.Save(28, &t.exitTracerAcked) + stateSinkObject.Save(29, &t.exitParentNotified) + stateSinkObject.Save(30, &t.exitParentAcked) + stateSinkObject.Save(32, &t.ptraceTracees) + stateSinkObject.Save(33, &t.ptraceSeized) + stateSinkObject.Save(34, &t.ptraceOpts) + stateSinkObject.Save(35, &t.ptraceSyscallMode) + stateSinkObject.Save(36, &t.ptraceSinglestep) + stateSinkObject.Save(37, &t.ptraceCode) + stateSinkObject.Save(38, &t.ptraceSiginfo) + stateSinkObject.Save(39, &t.ptraceEventMsg) + stateSinkObject.Save(40, &t.ptraceYAMAExceptionAdded) + stateSinkObject.Save(41, &t.ioUsage) + stateSinkObject.Save(42, &t.creds) + stateSinkObject.Save(43, &t.utsns) + stateSinkObject.Save(44, &t.ipcns) + stateSinkObject.Save(45, &t.abstractSockets) + stateSinkObject.Save(46, &t.mountNamespaceVFS2) + stateSinkObject.Save(47, &t.parentDeathSignal) + stateSinkObject.Save(49, &t.cleartid) + stateSinkObject.Save(50, &t.allowedCPUMask) + stateSinkObject.Save(51, &t.cpu) + stateSinkObject.Save(52, &t.niceness) + stateSinkObject.Save(53, &t.numaPolicy) + stateSinkObject.Save(54, &t.numaNodeMask) + stateSinkObject.Save(55, &t.netns) + stateSinkObject.Save(56, &t.rseqCPU) + stateSinkObject.Save(57, &t.oldRSeqCPUAddr) + stateSinkObject.Save(58, &t.rseqAddr) + stateSinkObject.Save(59, &t.rseqSignature) + stateSinkObject.Save(60, &t.robustList) + stateSinkObject.Save(61, &t.startTime) + stateSinkObject.Save(62, &t.kcov) + stateSinkObject.Save(63, &t.cgroups) +} + +// +checklocksignore +func (t *Task) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.taskNode) + stateSourceObject.Load(1, &t.runState) + stateSourceObject.Load(2, &t.taskWorkCount) + stateSourceObject.Load(3, &t.taskWork) + stateSourceObject.Load(4, &t.haveSyscallReturn) + stateSourceObject.Load(5, &t.gosched) + stateSourceObject.Load(6, &t.yieldCount) + stateSourceObject.Load(7, &t.pendingSignals) + stateSourceObject.Load(8, &t.signalMask) + stateSourceObject.Load(9, &t.realSignalMask) + stateSourceObject.Load(10, &t.haveSavedSignalMask) + stateSourceObject.Load(11, &t.savedSignalMask) + stateSourceObject.Load(12, &t.signalStack) + stateSourceObject.Load(13, &t.groupStopPending) + stateSourceObject.Load(14, &t.groupStopAcknowledged) + stateSourceObject.Load(15, &t.trapStopPending) + stateSourceObject.Load(16, &t.trapNotifyPending) + stateSourceObject.Load(17, &t.stop) + stateSourceObject.Load(18, &t.exitStatus) + stateSourceObject.Load(19, &t.syscallRestartBlock) + stateSourceObject.Load(20, &t.k) + stateSourceObject.Load(21, &t.containerID) + stateSourceObject.Load(22, &t.image) + stateSourceObject.Load(23, &t.fsContext) + stateSourceObject.Load(24, &t.fdTable) + stateSourceObject.Load(25, &t.vforkParent) + stateSourceObject.Load(26, &t.exitState) + stateSourceObject.Load(27, &t.exitTracerNotified) + stateSourceObject.Load(28, &t.exitTracerAcked) + stateSourceObject.Load(29, &t.exitParentNotified) + stateSourceObject.Load(30, &t.exitParentAcked) + stateSourceObject.Load(32, &t.ptraceTracees) + stateSourceObject.Load(33, &t.ptraceSeized) + stateSourceObject.Load(34, &t.ptraceOpts) + stateSourceObject.Load(35, &t.ptraceSyscallMode) + stateSourceObject.Load(36, &t.ptraceSinglestep) + stateSourceObject.Load(37, &t.ptraceCode) + stateSourceObject.Load(38, &t.ptraceSiginfo) + stateSourceObject.Load(39, &t.ptraceEventMsg) + stateSourceObject.Load(40, &t.ptraceYAMAExceptionAdded) + stateSourceObject.Load(41, &t.ioUsage) + stateSourceObject.Load(42, &t.creds) + stateSourceObject.Load(43, &t.utsns) + stateSourceObject.Load(44, &t.ipcns) + stateSourceObject.Load(45, &t.abstractSockets) + stateSourceObject.Load(46, &t.mountNamespaceVFS2) + stateSourceObject.Load(47, &t.parentDeathSignal) + stateSourceObject.Load(49, &t.cleartid) + stateSourceObject.Load(50, &t.allowedCPUMask) + stateSourceObject.Load(51, &t.cpu) + stateSourceObject.Load(52, &t.niceness) + stateSourceObject.Load(53, &t.numaPolicy) + stateSourceObject.Load(54, &t.numaNodeMask) + stateSourceObject.Load(55, &t.netns) + stateSourceObject.Load(56, &t.rseqCPU) + stateSourceObject.Load(57, &t.oldRSeqCPUAddr) + stateSourceObject.Load(58, &t.rseqAddr) + stateSourceObject.Load(59, &t.rseqSignature) + stateSourceObject.Load(60, &t.robustList) + stateSourceObject.Load(61, &t.startTime) + stateSourceObject.Load(62, &t.kcov) + stateSourceObject.Load(63, &t.cgroups) + stateSourceObject.LoadValue(31, new(*Task), func(y interface{}) { t.loadPtraceTracer(y.(*Task)) }) + stateSourceObject.LoadValue(48, new([]bpf.Program), func(y interface{}) { t.loadSyscallFilters(y.([]bpf.Program)) }) + stateSourceObject.AfterLoad(t.afterLoad) +} + +func (r *runSyscallAfterPtraceEventClone) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterPtraceEventClone" +} + +func (r *runSyscallAfterPtraceEventClone) StateFields() []string { + return []string{ + "vforkChild", + "vforkChildTID", + } +} + +func (r *runSyscallAfterPtraceEventClone) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterPtraceEventClone) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.vforkChild) + stateSinkObject.Save(1, &r.vforkChildTID) +} + +func (r *runSyscallAfterPtraceEventClone) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterPtraceEventClone) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.vforkChild) + stateSourceObject.Load(1, &r.vforkChildTID) +} + +func (r *runSyscallAfterVforkStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterVforkStop" +} + +func (r *runSyscallAfterVforkStop) StateFields() []string { + return []string{ + "childTID", + } +} + +func (r *runSyscallAfterVforkStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterVforkStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.childTID) +} + +func (r *runSyscallAfterVforkStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterVforkStop) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.childTID) +} + +func (v *vforkStop) StateTypeName() string { + return "pkg/sentry/kernel.vforkStop" +} + +func (v *vforkStop) StateFields() []string { + return []string{} +} + +func (v *vforkStop) beforeSave() {} + +// +checklocksignore +func (v *vforkStop) StateSave(stateSinkObject state.Sink) { + v.beforeSave() +} + +func (v *vforkStop) afterLoad() {} + +// +checklocksignore +func (v *vforkStop) StateLoad(stateSourceObject state.Source) { +} + +func (e *execStop) StateTypeName() string { + return "pkg/sentry/kernel.execStop" +} + +func (e *execStop) StateFields() []string { + return []string{} +} + +func (e *execStop) beforeSave() {} + +// +checklocksignore +func (e *execStop) StateSave(stateSinkObject state.Sink) { + e.beforeSave() +} + +func (e *execStop) afterLoad() {} + +// +checklocksignore +func (e *execStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallAfterExecStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterExecStop" +} + +func (r *runSyscallAfterExecStop) StateFields() []string { + return []string{ + "image", + } +} + +func (r *runSyscallAfterExecStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterExecStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.image) +} + +func (r *runSyscallAfterExecStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterExecStop) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.image) +} + +func (r *runExit) StateTypeName() string { + return "pkg/sentry/kernel.runExit" +} + +func (r *runExit) StateFields() []string { + return []string{} +} + +func (r *runExit) beforeSave() {} + +// +checklocksignore +func (r *runExit) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runExit) afterLoad() {} + +// +checklocksignore +func (r *runExit) StateLoad(stateSourceObject state.Source) { +} + +func (r *runExitMain) StateTypeName() string { + return "pkg/sentry/kernel.runExitMain" +} + +func (r *runExitMain) StateFields() []string { + return []string{} +} + +func (r *runExitMain) beforeSave() {} + +// +checklocksignore +func (r *runExitMain) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runExitMain) afterLoad() {} + +// +checklocksignore +func (r *runExitMain) StateLoad(stateSourceObject state.Source) { +} + +func (r *runExitNotify) StateTypeName() string { + return "pkg/sentry/kernel.runExitNotify" +} + +func (r *runExitNotify) StateFields() []string { + return []string{} +} + +func (r *runExitNotify) beforeSave() {} + +// +checklocksignore +func (r *runExitNotify) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runExitNotify) afterLoad() {} + +// +checklocksignore +func (r *runExitNotify) StateLoad(stateSourceObject state.Source) { +} + +func (image *TaskImage) StateTypeName() string { + return "pkg/sentry/kernel.TaskImage" +} + +func (image *TaskImage) StateFields() []string { + return []string{ + "Name", + "Arch", + "MemoryManager", + "fu", + "st", + } +} + +func (image *TaskImage) beforeSave() {} + +// +checklocksignore +func (image *TaskImage) StateSave(stateSinkObject state.Sink) { + image.beforeSave() + var stValue syscallTableInfo = image.saveSt() + stateSinkObject.SaveValue(4, stValue) + stateSinkObject.Save(0, &image.Name) + stateSinkObject.Save(1, &image.Arch) + stateSinkObject.Save(2, &image.MemoryManager) + stateSinkObject.Save(3, &image.fu) +} + +func (image *TaskImage) afterLoad() {} + +// +checklocksignore +func (image *TaskImage) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &image.Name) + stateSourceObject.Load(1, &image.Arch) + stateSourceObject.Load(2, &image.MemoryManager) + stateSourceObject.Load(3, &image.fu) + stateSourceObject.LoadValue(4, new(syscallTableInfo), func(y interface{}) { image.loadSt(y.(syscallTableInfo)) }) +} + +func (l *taskList) StateTypeName() string { + return "pkg/sentry/kernel.taskList" +} + +func (l *taskList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *taskList) beforeSave() {} + +// +checklocksignore +func (l *taskList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *taskList) afterLoad() {} + +// +checklocksignore +func (l *taskList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *taskEntry) StateTypeName() string { + return "pkg/sentry/kernel.taskEntry" +} + +func (e *taskEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *taskEntry) beforeSave() {} + +// +checklocksignore +func (e *taskEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *taskEntry) afterLoad() {} + +// +checklocksignore +func (e *taskEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (app *runApp) StateTypeName() string { + return "pkg/sentry/kernel.runApp" +} + +func (app *runApp) StateFields() []string { + return []string{} +} + +func (app *runApp) beforeSave() {} + +// +checklocksignore +func (app *runApp) StateSave(stateSinkObject state.Sink) { + app.beforeSave() +} + +func (app *runApp) afterLoad() {} + +// +checklocksignore +func (app *runApp) StateLoad(stateSourceObject state.Source) { +} + +func (ts *TaskGoroutineSchedInfo) StateTypeName() string { + return "pkg/sentry/kernel.TaskGoroutineSchedInfo" +} + +func (ts *TaskGoroutineSchedInfo) StateFields() []string { + return []string{ + "Timestamp", + "State", + "UserTicks", + "SysTicks", + } +} + +func (ts *TaskGoroutineSchedInfo) beforeSave() {} + +// +checklocksignore +func (ts *TaskGoroutineSchedInfo) StateSave(stateSinkObject state.Sink) { + ts.beforeSave() + stateSinkObject.Save(0, &ts.Timestamp) + stateSinkObject.Save(1, &ts.State) + stateSinkObject.Save(2, &ts.UserTicks) + stateSinkObject.Save(3, &ts.SysTicks) +} + +func (ts *TaskGoroutineSchedInfo) afterLoad() {} + +// +checklocksignore +func (ts *TaskGoroutineSchedInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ts.Timestamp) + stateSourceObject.Load(1, &ts.State) + stateSourceObject.Load(2, &ts.UserTicks) + stateSourceObject.Load(3, &ts.SysTicks) +} + +func (tc *taskClock) StateTypeName() string { + return "pkg/sentry/kernel.taskClock" +} + +func (tc *taskClock) StateFields() []string { + return []string{ + "t", + "includeSys", + } +} + +func (tc *taskClock) beforeSave() {} + +// +checklocksignore +func (tc *taskClock) StateSave(stateSinkObject state.Sink) { + tc.beforeSave() + stateSinkObject.Save(0, &tc.t) + stateSinkObject.Save(1, &tc.includeSys) +} + +func (tc *taskClock) afterLoad() {} + +// +checklocksignore +func (tc *taskClock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tc.t) + stateSourceObject.Load(1, &tc.includeSys) +} + +func (tgc *tgClock) StateTypeName() string { + return "pkg/sentry/kernel.tgClock" +} + +func (tgc *tgClock) StateFields() []string { + return []string{ + "tg", + "includeSys", + } +} + +func (tgc *tgClock) beforeSave() {} + +// +checklocksignore +func (tgc *tgClock) StateSave(stateSinkObject state.Sink) { + tgc.beforeSave() + stateSinkObject.Save(0, &tgc.tg) + stateSinkObject.Save(1, &tgc.includeSys) +} + +func (tgc *tgClock) afterLoad() {} + +// +checklocksignore +func (tgc *tgClock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tgc.tg) + stateSourceObject.Load(1, &tgc.includeSys) +} + +func (g *groupStop) StateTypeName() string { + return "pkg/sentry/kernel.groupStop" +} + +func (g *groupStop) StateFields() []string { + return []string{} +} + +func (g *groupStop) beforeSave() {} + +// +checklocksignore +func (g *groupStop) StateSave(stateSinkObject state.Sink) { + g.beforeSave() +} + +func (g *groupStop) afterLoad() {} + +// +checklocksignore +func (g *groupStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runInterrupt) StateTypeName() string { + return "pkg/sentry/kernel.runInterrupt" +} + +func (r *runInterrupt) StateFields() []string { + return []string{} +} + +func (r *runInterrupt) beforeSave() {} + +// +checklocksignore +func (r *runInterrupt) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runInterrupt) afterLoad() {} + +// +checklocksignore +func (r *runInterrupt) StateLoad(stateSourceObject state.Source) { +} + +func (r *runInterruptAfterSignalDeliveryStop) StateTypeName() string { + return "pkg/sentry/kernel.runInterruptAfterSignalDeliveryStop" +} + +func (r *runInterruptAfterSignalDeliveryStop) StateFields() []string { + return []string{} +} + +func (r *runInterruptAfterSignalDeliveryStop) beforeSave() {} + +// +checklocksignore +func (r *runInterruptAfterSignalDeliveryStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runInterruptAfterSignalDeliveryStop) afterLoad() {} + +// +checklocksignore +func (r *runInterruptAfterSignalDeliveryStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallAfterSyscallEnterStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterSyscallEnterStop" +} + +func (r *runSyscallAfterSyscallEnterStop) StateFields() []string { + return []string{} +} + +func (r *runSyscallAfterSyscallEnterStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterSyscallEnterStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallAfterSyscallEnterStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterSyscallEnterStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallAfterSysemuStop) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallAfterSysemuStop" +} + +func (r *runSyscallAfterSysemuStop) StateFields() []string { + return []string{} +} + +func (r *runSyscallAfterSysemuStop) beforeSave() {} + +// +checklocksignore +func (r *runSyscallAfterSysemuStop) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallAfterSysemuStop) afterLoad() {} + +// +checklocksignore +func (r *runSyscallAfterSysemuStop) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallReinvoke) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallReinvoke" +} + +func (r *runSyscallReinvoke) StateFields() []string { + return []string{} +} + +func (r *runSyscallReinvoke) beforeSave() {} + +// +checklocksignore +func (r *runSyscallReinvoke) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallReinvoke) afterLoad() {} + +// +checklocksignore +func (r *runSyscallReinvoke) StateLoad(stateSourceObject state.Source) { +} + +func (r *runSyscallExit) StateTypeName() string { + return "pkg/sentry/kernel.runSyscallExit" +} + +func (r *runSyscallExit) StateFields() []string { + return []string{} +} + +func (r *runSyscallExit) beforeSave() {} + +// +checklocksignore +func (r *runSyscallExit) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *runSyscallExit) afterLoad() {} + +// +checklocksignore +func (r *runSyscallExit) StateLoad(stateSourceObject state.Source) { +} + +func (tg *ThreadGroup) StateTypeName() string { + return "pkg/sentry/kernel.ThreadGroup" +} + +func (tg *ThreadGroup) StateFields() []string { + return []string{ + "threadGroupNode", + "signalHandlers", + "pendingSignals", + "groupStopDequeued", + "groupStopSignal", + "groupStopPendingCount", + "groupStopComplete", + "groupStopWaitable", + "groupContNotify", + "groupContInterrupted", + "groupContWaitable", + "exiting", + "exitStatus", + "terminationSignal", + "itimerRealTimer", + "itimerVirtSetting", + "itimerProfSetting", + "rlimitCPUSoftSetting", + "cpuTimersEnabled", + "timers", + "nextTimerID", + "exitedCPUStats", + "childCPUStats", + "ioUsage", + "maxRSS", + "childMaxRSS", + "limits", + "processGroup", + "execed", + "oldRSeqCritical", + "mounts", + "tty", + "oomScoreAdj", + } +} + +func (tg *ThreadGroup) beforeSave() {} + +// +checklocksignore +func (tg *ThreadGroup) StateSave(stateSinkObject state.Sink) { + tg.beforeSave() + var oldRSeqCriticalValue *OldRSeqCriticalRegion = tg.saveOldRSeqCritical() + stateSinkObject.SaveValue(29, oldRSeqCriticalValue) + stateSinkObject.Save(0, &tg.threadGroupNode) + stateSinkObject.Save(1, &tg.signalHandlers) + stateSinkObject.Save(2, &tg.pendingSignals) + stateSinkObject.Save(3, &tg.groupStopDequeued) + stateSinkObject.Save(4, &tg.groupStopSignal) + stateSinkObject.Save(5, &tg.groupStopPendingCount) + stateSinkObject.Save(6, &tg.groupStopComplete) + stateSinkObject.Save(7, &tg.groupStopWaitable) + stateSinkObject.Save(8, &tg.groupContNotify) + stateSinkObject.Save(9, &tg.groupContInterrupted) + stateSinkObject.Save(10, &tg.groupContWaitable) + stateSinkObject.Save(11, &tg.exiting) + stateSinkObject.Save(12, &tg.exitStatus) + stateSinkObject.Save(13, &tg.terminationSignal) + stateSinkObject.Save(14, &tg.itimerRealTimer) + stateSinkObject.Save(15, &tg.itimerVirtSetting) + stateSinkObject.Save(16, &tg.itimerProfSetting) + stateSinkObject.Save(17, &tg.rlimitCPUSoftSetting) + stateSinkObject.Save(18, &tg.cpuTimersEnabled) + stateSinkObject.Save(19, &tg.timers) + stateSinkObject.Save(20, &tg.nextTimerID) + stateSinkObject.Save(21, &tg.exitedCPUStats) + stateSinkObject.Save(22, &tg.childCPUStats) + stateSinkObject.Save(23, &tg.ioUsage) + stateSinkObject.Save(24, &tg.maxRSS) + stateSinkObject.Save(25, &tg.childMaxRSS) + stateSinkObject.Save(26, &tg.limits) + stateSinkObject.Save(27, &tg.processGroup) + stateSinkObject.Save(28, &tg.execed) + stateSinkObject.Save(30, &tg.mounts) + stateSinkObject.Save(31, &tg.tty) + stateSinkObject.Save(32, &tg.oomScoreAdj) +} + +func (tg *ThreadGroup) afterLoad() {} + +// +checklocksignore +func (tg *ThreadGroup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tg.threadGroupNode) + stateSourceObject.Load(1, &tg.signalHandlers) + stateSourceObject.Load(2, &tg.pendingSignals) + stateSourceObject.Load(3, &tg.groupStopDequeued) + stateSourceObject.Load(4, &tg.groupStopSignal) + stateSourceObject.Load(5, &tg.groupStopPendingCount) + stateSourceObject.Load(6, &tg.groupStopComplete) + stateSourceObject.Load(7, &tg.groupStopWaitable) + stateSourceObject.Load(8, &tg.groupContNotify) + stateSourceObject.Load(9, &tg.groupContInterrupted) + stateSourceObject.Load(10, &tg.groupContWaitable) + stateSourceObject.Load(11, &tg.exiting) + stateSourceObject.Load(12, &tg.exitStatus) + stateSourceObject.Load(13, &tg.terminationSignal) + stateSourceObject.Load(14, &tg.itimerRealTimer) + stateSourceObject.Load(15, &tg.itimerVirtSetting) + stateSourceObject.Load(16, &tg.itimerProfSetting) + stateSourceObject.Load(17, &tg.rlimitCPUSoftSetting) + stateSourceObject.Load(18, &tg.cpuTimersEnabled) + stateSourceObject.Load(19, &tg.timers) + stateSourceObject.Load(20, &tg.nextTimerID) + stateSourceObject.Load(21, &tg.exitedCPUStats) + stateSourceObject.Load(22, &tg.childCPUStats) + stateSourceObject.Load(23, &tg.ioUsage) + stateSourceObject.Load(24, &tg.maxRSS) + stateSourceObject.Load(25, &tg.childMaxRSS) + stateSourceObject.Load(26, &tg.limits) + stateSourceObject.Load(27, &tg.processGroup) + stateSourceObject.Load(28, &tg.execed) + stateSourceObject.Load(30, &tg.mounts) + stateSourceObject.Load(31, &tg.tty) + stateSourceObject.Load(32, &tg.oomScoreAdj) + stateSourceObject.LoadValue(29, new(*OldRSeqCriticalRegion), func(y interface{}) { tg.loadOldRSeqCritical(y.(*OldRSeqCriticalRegion)) }) +} + +func (l *itimerRealListener) StateTypeName() string { + return "pkg/sentry/kernel.itimerRealListener" +} + +func (l *itimerRealListener) StateFields() []string { + return []string{ + "tg", + } +} + +func (l *itimerRealListener) beforeSave() {} + +// +checklocksignore +func (l *itimerRealListener) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.tg) +} + +func (l *itimerRealListener) afterLoad() {} + +// +checklocksignore +func (l *itimerRealListener) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.tg) +} + +func (ts *TaskSet) StateTypeName() string { + return "pkg/sentry/kernel.TaskSet" +} + +func (ts *TaskSet) StateFields() []string { + return []string{ + "Root", + "sessions", + } +} + +func (ts *TaskSet) beforeSave() {} + +// +checklocksignore +func (ts *TaskSet) StateSave(stateSinkObject state.Sink) { + ts.beforeSave() + stateSinkObject.Save(0, &ts.Root) + stateSinkObject.Save(1, &ts.sessions) +} + +func (ts *TaskSet) afterLoad() {} + +// +checklocksignore +func (ts *TaskSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ts.Root) + stateSourceObject.Load(1, &ts.sessions) +} + +func (ns *PIDNamespace) StateTypeName() string { + return "pkg/sentry/kernel.PIDNamespace" +} + +func (ns *PIDNamespace) StateFields() []string { + return []string{ + "owner", + "parent", + "userns", + "last", + "tasks", + "tids", + "tgids", + "sessions", + "sids", + "processGroups", + "pgids", + "exiting", + } +} + +func (ns *PIDNamespace) beforeSave() {} + +// +checklocksignore +func (ns *PIDNamespace) StateSave(stateSinkObject state.Sink) { + ns.beforeSave() + stateSinkObject.Save(0, &ns.owner) + stateSinkObject.Save(1, &ns.parent) + stateSinkObject.Save(2, &ns.userns) + stateSinkObject.Save(3, &ns.last) + stateSinkObject.Save(4, &ns.tasks) + stateSinkObject.Save(5, &ns.tids) + stateSinkObject.Save(6, &ns.tgids) + stateSinkObject.Save(7, &ns.sessions) + stateSinkObject.Save(8, &ns.sids) + stateSinkObject.Save(9, &ns.processGroups) + stateSinkObject.Save(10, &ns.pgids) + stateSinkObject.Save(11, &ns.exiting) +} + +func (ns *PIDNamespace) afterLoad() {} + +// +checklocksignore +func (ns *PIDNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ns.owner) + stateSourceObject.Load(1, &ns.parent) + stateSourceObject.Load(2, &ns.userns) + stateSourceObject.Load(3, &ns.last) + stateSourceObject.Load(4, &ns.tasks) + stateSourceObject.Load(5, &ns.tids) + stateSourceObject.Load(6, &ns.tgids) + stateSourceObject.Load(7, &ns.sessions) + stateSourceObject.Load(8, &ns.sids) + stateSourceObject.Load(9, &ns.processGroups) + stateSourceObject.Load(10, &ns.pgids) + stateSourceObject.Load(11, &ns.exiting) +} + +func (t *threadGroupNode) StateTypeName() string { + return "pkg/sentry/kernel.threadGroupNode" +} + +func (t *threadGroupNode) StateFields() []string { + return []string{ + "pidns", + "leader", + "execing", + "tasks", + "tasksCount", + "liveTasks", + "activeTasks", + } +} + +func (t *threadGroupNode) beforeSave() {} + +// +checklocksignore +func (t *threadGroupNode) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.pidns) + stateSinkObject.Save(1, &t.leader) + stateSinkObject.Save(2, &t.execing) + stateSinkObject.Save(3, &t.tasks) + stateSinkObject.Save(4, &t.tasksCount) + stateSinkObject.Save(5, &t.liveTasks) + stateSinkObject.Save(6, &t.activeTasks) +} + +func (t *threadGroupNode) afterLoad() {} + +// +checklocksignore +func (t *threadGroupNode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.pidns) + stateSourceObject.Load(1, &t.leader) + stateSourceObject.Load(2, &t.execing) + stateSourceObject.Load(3, &t.tasks) + stateSourceObject.Load(4, &t.tasksCount) + stateSourceObject.Load(5, &t.liveTasks) + stateSourceObject.Load(6, &t.activeTasks) +} + +func (t *taskNode) StateTypeName() string { + return "pkg/sentry/kernel.taskNode" +} + +func (t *taskNode) StateFields() []string { + return []string{ + "tg", + "taskEntry", + "parent", + "children", + "childPIDNamespace", + } +} + +func (t *taskNode) beforeSave() {} + +// +checklocksignore +func (t *taskNode) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.tg) + stateSinkObject.Save(1, &t.taskEntry) + stateSinkObject.Save(2, &t.parent) + stateSinkObject.Save(3, &t.children) + stateSinkObject.Save(4, &t.childPIDNamespace) +} + +func (t *taskNode) afterLoad() {} + +// +checklocksignore +func (t *taskNode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &t.tg) + stateSourceObject.Load(1, &t.taskEntry) + stateSourceObject.Load(2, &t.parent) + stateSourceObject.Load(3, &t.children) + stateSourceObject.Load(4, &t.childPIDNamespace) +} + +func (t *Timekeeper) StateTypeName() string { + return "pkg/sentry/kernel.Timekeeper" +} + +func (t *Timekeeper) StateFields() []string { + return []string{ + "realtimeClock", + "monotonicClock", + "bootTime", + "saveMonotonic", + "saveRealtime", + "params", + } +} + +// +checklocksignore +func (t *Timekeeper) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.realtimeClock) + stateSinkObject.Save(1, &t.monotonicClock) + stateSinkObject.Save(2, &t.bootTime) + stateSinkObject.Save(3, &t.saveMonotonic) + stateSinkObject.Save(4, &t.saveRealtime) + stateSinkObject.Save(5, &t.params) +} + +// +checklocksignore +func (t *Timekeeper) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.realtimeClock) + stateSourceObject.Load(1, &t.monotonicClock) + stateSourceObject.Load(2, &t.bootTime) + stateSourceObject.Load(3, &t.saveMonotonic) + stateSourceObject.Load(4, &t.saveRealtime) + stateSourceObject.Load(5, &t.params) + stateSourceObject.AfterLoad(t.afterLoad) +} + +func (tc *timekeeperClock) StateTypeName() string { + return "pkg/sentry/kernel.timekeeperClock" +} + +func (tc *timekeeperClock) StateFields() []string { + return []string{ + "tk", + "c", + } +} + +func (tc *timekeeperClock) beforeSave() {} + +// +checklocksignore +func (tc *timekeeperClock) StateSave(stateSinkObject state.Sink) { + tc.beforeSave() + stateSinkObject.Save(0, &tc.tk) + stateSinkObject.Save(1, &tc.c) +} + +func (tc *timekeeperClock) afterLoad() {} + +// +checklocksignore +func (tc *timekeeperClock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tc.tk) + stateSourceObject.Load(1, &tc.c) +} + +func (t *TTY) StateTypeName() string { + return "pkg/sentry/kernel.TTY" +} + +func (t *TTY) StateFields() []string { + return []string{ + "Index", + "tg", + } +} + +func (t *TTY) beforeSave() {} + +// +checklocksignore +func (t *TTY) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Index) + stateSinkObject.Save(1, &t.tg) +} + +func (t *TTY) afterLoad() {} + +// +checklocksignore +func (t *TTY) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Index) + stateSourceObject.Load(1, &t.tg) +} + +func (u *UTSNamespace) StateTypeName() string { + return "pkg/sentry/kernel.UTSNamespace" +} + +func (u *UTSNamespace) StateFields() []string { + return []string{ + "hostName", + "domainName", + "userns", + } +} + +func (u *UTSNamespace) beforeSave() {} + +// +checklocksignore +func (u *UTSNamespace) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.hostName) + stateSinkObject.Save(1, &u.domainName) + stateSinkObject.Save(2, &u.userns) +} + +func (u *UTSNamespace) afterLoad() {} + +// +checklocksignore +func (u *UTSNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.hostName) + stateSourceObject.Load(1, &u.domainName) + stateSourceObject.Load(2, &u.userns) +} + +func (v *VDSOParamPage) StateTypeName() string { + return "pkg/sentry/kernel.VDSOParamPage" +} + +func (v *VDSOParamPage) StateFields() []string { + return []string{ + "mfp", + "fr", + "seq", + "copyScratchBuffer", + } +} + +func (v *VDSOParamPage) beforeSave() {} + +// +checklocksignore +func (v *VDSOParamPage) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.mfp) + stateSinkObject.Save(1, &v.fr) + stateSinkObject.Save(2, &v.seq) + stateSinkObject.Save(3, &v.copyScratchBuffer) +} + +func (v *VDSOParamPage) afterLoad() {} + +// +checklocksignore +func (v *VDSOParamPage) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.mfp) + stateSourceObject.Load(1, &v.fr) + stateSourceObject.Load(2, &v.seq) + stateSourceObject.Load(3, &v.copyScratchBuffer) +} + +func init() { + state.Register((*abstractEndpoint)(nil)) + state.Register((*AbstractSocketNamespace)(nil)) + state.Register((*Cgroup)(nil)) + state.Register((*hierarchy)(nil)) + state.Register((*CgroupRegistry)(nil)) + state.Register((*FDFlags)(nil)) + state.Register((*descriptor)(nil)) + state.Register((*FDTable)(nil)) + state.Register((*FDTableRefs)(nil)) + state.Register((*FSContext)(nil)) + state.Register((*FSContextRefs)(nil)) + state.Register((*IPCNamespace)(nil)) + state.Register((*IPCNamespaceRefs)(nil)) + state.Register((*Kernel)(nil)) + state.Register((*SocketRecord)(nil)) + state.Register((*SocketRecordVFS1)(nil)) + state.Register((*pendingSignals)(nil)) + state.Register((*pendingSignalQueue)(nil)) + state.Register((*pendingSignal)(nil)) + state.Register((*pendingSignalList)(nil)) + state.Register((*pendingSignalEntry)(nil)) + state.Register((*savedPendingSignal)(nil)) + state.Register((*IntervalTimer)(nil)) + state.Register((*processGroupList)(nil)) + state.Register((*processGroupEntry)(nil)) + state.Register((*ProcessGroupRefs)(nil)) + state.Register((*ptraceOptions)(nil)) + state.Register((*ptraceStop)(nil)) + state.Register((*OldRSeqCriticalRegion)(nil)) + state.Register((*sessionList)(nil)) + state.Register((*sessionEntry)(nil)) + state.Register((*SessionRefs)(nil)) + state.Register((*Session)(nil)) + state.Register((*ProcessGroup)(nil)) + state.Register((*SignalHandlers)(nil)) + state.Register((*socketList)(nil)) + state.Register((*socketEntry)(nil)) + state.Register((*syscallTableInfo)(nil)) + state.Register((*syslog)(nil)) + state.Register((*Task)(nil)) + state.Register((*runSyscallAfterPtraceEventClone)(nil)) + state.Register((*runSyscallAfterVforkStop)(nil)) + state.Register((*vforkStop)(nil)) + state.Register((*execStop)(nil)) + state.Register((*runSyscallAfterExecStop)(nil)) + state.Register((*runExit)(nil)) + state.Register((*runExitMain)(nil)) + state.Register((*runExitNotify)(nil)) + state.Register((*TaskImage)(nil)) + state.Register((*taskList)(nil)) + state.Register((*taskEntry)(nil)) + state.Register((*runApp)(nil)) + state.Register((*TaskGoroutineSchedInfo)(nil)) + state.Register((*taskClock)(nil)) + state.Register((*tgClock)(nil)) + state.Register((*groupStop)(nil)) + state.Register((*runInterrupt)(nil)) + state.Register((*runInterruptAfterSignalDeliveryStop)(nil)) + state.Register((*runSyscallAfterSyscallEnterStop)(nil)) + state.Register((*runSyscallAfterSysemuStop)(nil)) + state.Register((*runSyscallReinvoke)(nil)) + state.Register((*runSyscallExit)(nil)) + state.Register((*ThreadGroup)(nil)) + state.Register((*itimerRealListener)(nil)) + state.Register((*TaskSet)(nil)) + state.Register((*PIDNamespace)(nil)) + state.Register((*threadGroupNode)(nil)) + state.Register((*taskNode)(nil)) + state.Register((*Timekeeper)(nil)) + state.Register((*timekeeperClock)(nil)) + state.Register((*TTY)(nil)) + state.Register((*UTSNamespace)(nil)) + state.Register((*VDSOParamPage)(nil)) +} diff --git a/pkg/sentry/kernel/kernel_unsafe_abi_autogen_unsafe.go b/pkg/sentry/kernel/kernel_unsafe_abi_autogen_unsafe.go new file mode 100644 index 000000000..5d810c89c --- /dev/null +++ b/pkg/sentry/kernel/kernel_unsafe_abi_autogen_unsafe.go @@ -0,0 +1,7 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package kernel + +import ( +) + diff --git a/pkg/sentry/kernel/kernel_unsafe_state_autogen.go b/pkg/sentry/kernel/kernel_unsafe_state_autogen.go new file mode 100644 index 000000000..12130bf74 --- /dev/null +++ b/pkg/sentry/kernel/kernel_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package kernel diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD deleted file mode 100644 index 4486848d2..000000000 --- a/pkg/sentry/kernel/memevent/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools:defs.bzl", "go_library", "proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "memevent", - srcs = ["memory_events.go"], - visibility = ["//:sandbox"], - deps = [ - ":memory_events_go_proto", - "//pkg/eventchannel", - "//pkg/log", - "//pkg/metric", - "//pkg/sentry/kernel", - "//pkg/sentry/usage", - "//pkg/sync", - ], -) - -proto_library( - name = "memory_events", - srcs = ["memory_events.proto"], - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/kernel/memevent/memevent_state_autogen.go b/pkg/sentry/kernel/memevent/memevent_state_autogen.go new file mode 100644 index 000000000..4a1679fa9 --- /dev/null +++ b/pkg/sentry/kernel/memevent/memevent_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package memevent diff --git a/pkg/sentry/kernel/memevent/memory_events.proto b/pkg/sentry/kernel/memevent/memory_events.proto deleted file mode 100644 index bf8029ff5..000000000 --- a/pkg/sentry/kernel/memevent/memory_events.proto +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -// MemoryUsageEvent describes the memory usage of the sandbox at a single -// instant in time. These messages are emitted periodically on the eventchannel. -message MemoryUsageEvent { - // The total memory usage of the sandboxed application in bytes, calculated - // using the 'fast' method. - uint64 total = 1; - - // Memory used to back memory-mapped regions for files in the application, in - // bytes. This corresponds to the usage.MemoryKind.Mapped memory type. - uint64 mapped = 2; -} diff --git a/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go b/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go new file mode 100644 index 000000000..ed1b8a8ca --- /dev/null +++ b/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go @@ -0,0 +1,158 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/kernel/memevent/memory_events.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type MemoryUsageEvent struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Total uint64 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"` + Mapped uint64 `protobuf:"varint,2,opt,name=mapped,proto3" json:"mapped,omitempty"` +} + +func (x *MemoryUsageEvent) Reset() { + *x = MemoryUsageEvent{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *MemoryUsageEvent) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MemoryUsageEvent) ProtoMessage() {} + +func (x *MemoryUsageEvent) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MemoryUsageEvent.ProtoReflect.Descriptor instead. +func (*MemoryUsageEvent) Descriptor() ([]byte, []int) { + return file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescGZIP(), []int{0} +} + +func (x *MemoryUsageEvent) GetTotal() uint64 { + if x != nil { + return x.Total + } + return 0 +} + +func (x *MemoryUsageEvent) GetMapped() uint64 { + if x != nil { + return x.Mapped + } + return 0 +} + +var File_pkg_sentry_kernel_memevent_memory_events_proto protoreflect.FileDescriptor + +var file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc = []byte{ + 0x0a, 0x2e, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x6b, 0x65, 0x72, + 0x6e, 0x65, 0x6c, 0x2f, 0x6d, 0x65, 0x6d, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2f, 0x6d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x22, 0x40, 0x0a, 0x10, 0x4d, 0x65, 0x6d, 0x6f, + 0x72, 0x79, 0x55, 0x73, 0x61, 0x67, 0x65, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x14, 0x0a, 0x05, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, + 0x61, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x61, 0x70, 0x70, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x06, 0x6d, 0x61, 0x70, 0x70, 0x65, 0x64, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescOnce sync.Once + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData = file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc +) + +func file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescGZIP() []byte { + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescOnce.Do(func() { + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData) + }) + return file_pkg_sentry_kernel_memevent_memory_events_proto_rawDescData +} + +var file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_pkg_sentry_kernel_memevent_memory_events_proto_goTypes = []interface{}{ + (*MemoryUsageEvent)(nil), // 0: gvisor.MemoryUsageEvent +} +var file_pkg_sentry_kernel_memevent_memory_events_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_kernel_memevent_memory_events_proto_init() } +func file_pkg_sentry_kernel_memevent_memory_events_proto_init() { + if File_pkg_sentry_kernel_memevent_memory_events_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MemoryUsageEvent); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_kernel_memevent_memory_events_proto_goTypes, + DependencyIndexes: file_pkg_sentry_kernel_memevent_memory_events_proto_depIdxs, + MessageInfos: file_pkg_sentry_kernel_memevent_memory_events_proto_msgTypes, + }.Build() + File_pkg_sentry_kernel_memevent_memory_events_proto = out.File + file_pkg_sentry_kernel_memevent_memory_events_proto_rawDesc = nil + file_pkg_sentry_kernel_memevent_memory_events_proto_goTypes = nil + file_pkg_sentry_kernel_memevent_memory_events_proto_depIdxs = nil +} diff --git a/pkg/sentry/kernel/msgqueue/BUILD b/pkg/sentry/kernel/msgqueue/BUILD deleted file mode 100644 index 5ec11e1f6..000000000 --- a/pkg/sentry/kernel/msgqueue/BUILD +++ /dev/null @@ -1,36 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "message_list", - out = "message_list.go", - package = "msgqueue", - prefix = "msg", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Message", - "Linker": "*Message", - }, -) - -go_library( - name = "msgqueue", - srcs = [ - "message_list.go", - "msgqueue.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/ipc", - "//pkg/sentry/kernel/time", - "//pkg/sync", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/msgqueue/message_list.go b/pkg/sentry/kernel/msgqueue/message_list.go new file mode 100644 index 000000000..f2f2292e7 --- /dev/null +++ b/pkg/sentry/kernel/msgqueue/message_list.go @@ -0,0 +1,221 @@ +package msgqueue + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type msgElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (msgElementMapper) linkerFor(elem *Message) *Message { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type msgList struct { + head *Message + tail *Message +} + +// Reset resets list l to the empty state. +func (l *msgList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *msgList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *msgList) Front() *Message { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *msgList) Back() *Message { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *msgList) Len() (count int) { + for e := l.Front(); e != nil; e = (msgElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *msgList) PushFront(e *Message) { + linker := msgElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + msgElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *msgList) PushBack(e *Message) { + linker := msgElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + msgElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *msgList) PushBackList(m *msgList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + msgElementMapper{}.linkerFor(l.tail).SetNext(m.head) + msgElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *msgList) InsertAfter(b, e *Message) { + bLinker := msgElementMapper{}.linkerFor(b) + eLinker := msgElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + msgElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *msgList) InsertBefore(a, e *Message) { + aLinker := msgElementMapper{}.linkerFor(a) + eLinker := msgElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + msgElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *msgList) Remove(e *Message) { + linker := msgElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + msgElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + msgElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type msgEntry struct { + next *Message + prev *Message +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *msgEntry) Next() *Message { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *msgEntry) Prev() *Message { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *msgEntry) SetNext(elem *Message) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *msgEntry) SetPrev(elem *Message) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/msgqueue/msgqueue_state_autogen.go b/pkg/sentry/kernel/msgqueue/msgqueue_state_autogen.go new file mode 100644 index 000000000..3dfcd09cb --- /dev/null +++ b/pkg/sentry/kernel/msgqueue/msgqueue_state_autogen.go @@ -0,0 +1,194 @@ +// automatically generated by stateify. + +package msgqueue + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *msgList) StateTypeName() string { + return "pkg/sentry/kernel/msgqueue.msgList" +} + +func (l *msgList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *msgList) beforeSave() {} + +// +checklocksignore +func (l *msgList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *msgList) afterLoad() {} + +// +checklocksignore +func (l *msgList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *msgEntry) StateTypeName() string { + return "pkg/sentry/kernel/msgqueue.msgEntry" +} + +func (e *msgEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *msgEntry) beforeSave() {} + +// +checklocksignore +func (e *msgEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *msgEntry) afterLoad() {} + +// +checklocksignore +func (e *msgEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/kernel/msgqueue.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "reg", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.reg) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.reg) +} + +func (q *Queue) StateTypeName() string { + return "pkg/sentry/kernel/msgqueue.Queue" +} + +func (q *Queue) StateFields() []string { + return []string{ + "registry", + "dead", + "obj", + "senders", + "receivers", + "messages", + "sendTime", + "receiveTime", + "changeTime", + "byteCount", + "messageCount", + "maxBytes", + "sendPID", + "receivePID", + } +} + +func (q *Queue) beforeSave() {} + +// +checklocksignore +func (q *Queue) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.registry) + stateSinkObject.Save(1, &q.dead) + stateSinkObject.Save(2, &q.obj) + stateSinkObject.Save(3, &q.senders) + stateSinkObject.Save(4, &q.receivers) + stateSinkObject.Save(5, &q.messages) + stateSinkObject.Save(6, &q.sendTime) + stateSinkObject.Save(7, &q.receiveTime) + stateSinkObject.Save(8, &q.changeTime) + stateSinkObject.Save(9, &q.byteCount) + stateSinkObject.Save(10, &q.messageCount) + stateSinkObject.Save(11, &q.maxBytes) + stateSinkObject.Save(12, &q.sendPID) + stateSinkObject.Save(13, &q.receivePID) +} + +func (q *Queue) afterLoad() {} + +// +checklocksignore +func (q *Queue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.registry) + stateSourceObject.Load(1, &q.dead) + stateSourceObject.Load(2, &q.obj) + stateSourceObject.Load(3, &q.senders) + stateSourceObject.Load(4, &q.receivers) + stateSourceObject.Load(5, &q.messages) + stateSourceObject.Load(6, &q.sendTime) + stateSourceObject.Load(7, &q.receiveTime) + stateSourceObject.Load(8, &q.changeTime) + stateSourceObject.Load(9, &q.byteCount) + stateSourceObject.Load(10, &q.messageCount) + stateSourceObject.Load(11, &q.maxBytes) + stateSourceObject.Load(12, &q.sendPID) + stateSourceObject.Load(13, &q.receivePID) +} + +func (m *Message) StateTypeName() string { + return "pkg/sentry/kernel/msgqueue.Message" +} + +func (m *Message) StateFields() []string { + return []string{ + "msgEntry", + "mType", + "mText", + "mSize", + } +} + +func (m *Message) beforeSave() {} + +// +checklocksignore +func (m *Message) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.msgEntry) + stateSinkObject.Save(1, &m.mType) + stateSinkObject.Save(2, &m.mText) + stateSinkObject.Save(3, &m.mSize) +} + +func (m *Message) afterLoad() {} + +// +checklocksignore +func (m *Message) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.msgEntry) + stateSourceObject.Load(1, &m.mType) + stateSourceObject.Load(2, &m.mText) + stateSourceObject.Load(3, &m.mSize) +} + +func init() { + state.Register((*msgList)(nil)) + state.Register((*msgEntry)(nil)) + state.Register((*Registry)(nil)) + state.Register((*Queue)(nil)) + state.Register((*Message)(nil)) +} diff --git a/pkg/sentry/kernel/pending_signals_list.go b/pkg/sentry/kernel/pending_signals_list.go new file mode 100644 index 000000000..d5b483b3e --- /dev/null +++ b/pkg/sentry/kernel/pending_signals_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type pendingSignalElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (pendingSignalElementMapper) linkerFor(elem *pendingSignal) *pendingSignal { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type pendingSignalList struct { + head *pendingSignal + tail *pendingSignal +} + +// Reset resets list l to the empty state. +func (l *pendingSignalList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *pendingSignalList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *pendingSignalList) Front() *pendingSignal { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *pendingSignalList) Back() *pendingSignal { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *pendingSignalList) Len() (count int) { + for e := l.Front(); e != nil; e = (pendingSignalElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *pendingSignalList) PushFront(e *pendingSignal) { + linker := pendingSignalElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + pendingSignalElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *pendingSignalList) PushBack(e *pendingSignal) { + linker := pendingSignalElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + pendingSignalElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *pendingSignalList) PushBackList(m *pendingSignalList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + pendingSignalElementMapper{}.linkerFor(l.tail).SetNext(m.head) + pendingSignalElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *pendingSignalList) InsertAfter(b, e *pendingSignal) { + bLinker := pendingSignalElementMapper{}.linkerFor(b) + eLinker := pendingSignalElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + pendingSignalElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *pendingSignalList) InsertBefore(a, e *pendingSignal) { + aLinker := pendingSignalElementMapper{}.linkerFor(a) + eLinker := pendingSignalElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + pendingSignalElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *pendingSignalList) Remove(e *pendingSignal) { + linker := pendingSignalElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + pendingSignalElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + pendingSignalElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type pendingSignalEntry struct { + next *pendingSignal + prev *pendingSignal +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) Next() *pendingSignal { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) Prev() *pendingSignal { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) SetNext(elem *pendingSignal) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *pendingSignalEntry) SetPrev(elem *pendingSignal) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD deleted file mode 100644 index 94ebac7c5..000000000 --- a/pkg/sentry/kernel/pipe/BUILD +++ /dev/null @@ -1,58 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "pipe", - srcs = [ - "device.go", - "node.go", - "pipe.go", - "pipe_unsafe.go", - "pipe_util.go", - "reader.go", - "reader_writer.go", - "save_restore.go", - "vfs.go", - "writer.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/marshal/primitive", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "pipe_test", - size = "small", - srcs = [ - "node_test.go", - "pipe_test.go", - ], - library = ":pipe", - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go deleted file mode 100644 index d25cf658e..000000000 --- a/pkg/sentry/kernel/pipe/node_test.go +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "testing" - "time" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type sleeper struct { - context.Context - ch chan struct{} -} - -func newSleeperContext(t *testing.T) context.Context { - return &sleeper{ - Context: contexttest.Context(t), - ch: make(chan struct{}), - } -} - -func (s *sleeper) SleepStart() <-chan struct{} { - return s.ch -} - -func (s *sleeper) SleepFinish(bool) { -} - -func (s *sleeper) Cancel() { - s.ch <- struct{}{} -} - -func (s *sleeper) Interrupted() bool { - return len(s.ch) != 0 -} - -type openResult struct { - *fs.File - error -} - -var perms fs.FilePermissions = fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, -} - -func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, doneChan chan<- struct{}) (*fs.File, error) { - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(ctx, inode, "pipe") - file, err := n.GetFile(ctx, d, flags) - if err != nil { - t.Errorf("open with flags %+v failed: %v", flags, err) - return nil, err - } - if doneChan != nil { - doneChan <- struct{}{} - } - return file, err -} - -func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, resChan chan<- openResult) (*fs.File, error) { - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(ctx, inode, "pipe") - file, err := n.GetFile(ctx, d, flags) - if resChan != nil { - resChan <- openResult{file, err} - } - return file, err -} - -func newNamedPipe(t *testing.T) *Pipe { - return NewPipe(true, DefaultPipeSize) -} - -func newAnonPipe(t *testing.T) *Pipe { - return NewPipe(false, DefaultPipeSize) -} - -// assertRecvBlocks ensures that a recv attempt on c blocks for at least -// blockDuration. This is useful for checking that a goroutine that is supposed -// to be executing a blocking operation is actually blocking. -func assertRecvBlocks(t *testing.T, c <-chan struct{}, blockDuration time.Duration, failMsg string) { - select { - case <-c: - t.Fatalf(failMsg) - case <-time.After(blockDuration): - // Ok, blocked for the required duration. - } -} - -func TestReadOpenBlocksForWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - // Verify that the open for read is blocking. - assertRecvBlocks(t, rDone, time.Millisecond*100, - "open for read not blocking with no writers") - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - <-wDone - <-rDone -} - -func TestWriteOpenBlocksForReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - // Verify that the open for write is blocking - assertRecvBlocks(t, wDone, time.Millisecond*100, - "open for write not blocking with no readers") - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - <-rDone - <-wDone -} - -func TestMultipleWriteOpenDoesntCountAsReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone1 := make(chan struct{}) - rDone2 := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone1) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone2) - - assertRecvBlocks(t, rDone1, time.Millisecond*100, - "open for read didn't block with no writers") - assertRecvBlocks(t, rDone2, time.Millisecond*100, - "open for read didn't block with no writers") - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - <-wDone - <-rDone2 - <-rDone1 -} - -func TestClosedReaderBlocksWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rFile, _ := testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil) - rFile.DecRef(ctx) - - wDone := make(chan struct{}) - // This open for write should block because the reader is now gone. - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - assertRecvBlocks(t, wDone, time.Millisecond*100, - "open for write didn't block with no concurrent readers") - - // Open for read again. This should unblock the open for write. - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - <-rDone - <-wDone -} - -func TestReadWriteOpenNeverBlocks(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rwDone := make(chan struct{}) - // Open for read-write never wait for a reader or writer, even if the - // nonblocking flag is not set. - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true, NonBlocking: false}, rwDone) - <-rwDone -} - -func TestReadWriteOpenUnblocksReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - rwDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true}, rwDone) - - <-rwDone - <-rDone -} - -func TestReadWriteOpenUnblocksWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - rwDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true}, rwDone) - - <-rwDone - <-wDone -} - -func TestBlockedOpenIsCancellable(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - done := make(chan openResult) - go testOpen(ctx, t, f, fs.FileFlags{Read: true}, done) - select { - case <-done: - t.Fatalf("open for read didn't block with no writers") - case <-time.After(time.Millisecond * 100): - // Ok. - } - - ctx.(*sleeper).Cancel() - // If the cancel on the sleeper didn't work, the open for read would never - // return. - res := <-done - if res.error != syserror.ErrInterrupted { - t.Fatalf("Cancellation didn't cause GetFile to return fs.ErrInterrupted, got %v.", - res.error) - } -} - -func TestNonblockingReadOpenFileNoWriters(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for read failed with error %v.", err) - } -} - -func TestNonblockingWriteOpenFileNoReaders(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); !linuxerr.Equals(linuxerr.ENXIO, err) { - t.Fatalf("Nonblocking open for write failed unexpected error %v.", err) - } -} - -func TestNonBlockingReadOpenWithWriter(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - // Open for write blocks since there are no readers yet. - assertRecvBlocks(t, wDone, time.Millisecond*100, - "Open for write didn't block with no reader.") - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for read failed with error %v.", err) - } - - // Open for write should now be unblocked. - <-wDone -} - -func TestNonBlockingWriteOpenWithReader(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - // Open for write blocked, since no reader yet. - assertRecvBlocks(t, rDone, time.Millisecond*100, - "Open for reader didn't block with no writer.") - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for write failed with error %v.", err) - } - - // Open for write should now be unblocked. - <-rDone -} - -func TestAnonReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newAnonPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true}, nil); err != nil { - t.Fatalf("open anon pipe for read failed: %v", err) - } -} - -func TestAnonWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newAnonPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true}, nil); err != nil { - t.Fatalf("open anon pipe for write failed: %v", err) - } -} diff --git a/pkg/sentry/kernel/pipe/pipe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_state_autogen.go new file mode 100644 index 000000000..57451e951 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_state_autogen.go @@ -0,0 +1,227 @@ +// automatically generated by stateify. + +package pipe + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *inodeOperations) StateTypeName() string { + return "pkg/sentry/kernel/pipe.inodeOperations" +} + +func (i *inodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "p", + } +} + +func (i *inodeOperations) beforeSave() {} + +// +checklocksignore +func (i *inodeOperations) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeSimpleAttributes) + stateSinkObject.Save(1, &i.p) +} + +func (i *inodeOperations) afterLoad() {} + +// +checklocksignore +func (i *inodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeSimpleAttributes) + stateSourceObject.Load(1, &i.p) +} + +func (p *Pipe) StateTypeName() string { + return "pkg/sentry/kernel/pipe.Pipe" +} + +func (p *Pipe) StateFields() []string { + return []string{ + "isNamed", + "readers", + "writers", + "buf", + "off", + "size", + "max", + "hadWriter", + } +} + +func (p *Pipe) beforeSave() {} + +// +checklocksignore +func (p *Pipe) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.isNamed) + stateSinkObject.Save(1, &p.readers) + stateSinkObject.Save(2, &p.writers) + stateSinkObject.Save(3, &p.buf) + stateSinkObject.Save(4, &p.off) + stateSinkObject.Save(5, &p.size) + stateSinkObject.Save(6, &p.max) + stateSinkObject.Save(7, &p.hadWriter) +} + +// +checklocksignore +func (p *Pipe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.isNamed) + stateSourceObject.Load(1, &p.readers) + stateSourceObject.Load(2, &p.writers) + stateSourceObject.Load(3, &p.buf) + stateSourceObject.Load(4, &p.off) + stateSourceObject.Load(5, &p.size) + stateSourceObject.Load(6, &p.max) + stateSourceObject.Load(7, &p.hadWriter) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func (r *Reader) StateTypeName() string { + return "pkg/sentry/kernel/pipe.Reader" +} + +func (r *Reader) StateFields() []string { + return []string{ + "ReaderWriter", + } +} + +func (r *Reader) beforeSave() {} + +// +checklocksignore +func (r *Reader) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.ReaderWriter) +} + +func (r *Reader) afterLoad() {} + +// +checklocksignore +func (r *Reader) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.ReaderWriter) +} + +func (rw *ReaderWriter) StateTypeName() string { + return "pkg/sentry/kernel/pipe.ReaderWriter" +} + +func (rw *ReaderWriter) StateFields() []string { + return []string{ + "Pipe", + } +} + +func (rw *ReaderWriter) beforeSave() {} + +// +checklocksignore +func (rw *ReaderWriter) StateSave(stateSinkObject state.Sink) { + rw.beforeSave() + stateSinkObject.Save(0, &rw.Pipe) +} + +func (rw *ReaderWriter) afterLoad() {} + +// +checklocksignore +func (rw *ReaderWriter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rw.Pipe) +} + +func (vp *VFSPipe) StateTypeName() string { + return "pkg/sentry/kernel/pipe.VFSPipe" +} + +func (vp *VFSPipe) StateFields() []string { + return []string{ + "pipe", + } +} + +func (vp *VFSPipe) beforeSave() {} + +// +checklocksignore +func (vp *VFSPipe) StateSave(stateSinkObject state.Sink) { + vp.beforeSave() + stateSinkObject.Save(0, &vp.pipe) +} + +func (vp *VFSPipe) afterLoad() {} + +// +checklocksignore +func (vp *VFSPipe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &vp.pipe) +} + +func (fd *VFSPipeFD) StateTypeName() string { + return "pkg/sentry/kernel/pipe.VFSPipeFD" +} + +func (fd *VFSPipeFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "LockFD", + "pipe", + } +} + +func (fd *VFSPipeFD) beforeSave() {} + +// +checklocksignore +func (fd *VFSPipeFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &fd.LockFD) + stateSinkObject.Save(4, &fd.pipe) +} + +func (fd *VFSPipeFD) afterLoad() {} + +// +checklocksignore +func (fd *VFSPipeFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &fd.LockFD) + stateSourceObject.Load(4, &fd.pipe) +} + +func (w *Writer) StateTypeName() string { + return "pkg/sentry/kernel/pipe.Writer" +} + +func (w *Writer) StateFields() []string { + return []string{ + "ReaderWriter", + } +} + +func (w *Writer) beforeSave() {} + +// +checklocksignore +func (w *Writer) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.ReaderWriter) +} + +func (w *Writer) afterLoad() {} + +// +checklocksignore +func (w *Writer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.ReaderWriter) +} + +func init() { + state.Register((*inodeOperations)(nil)) + state.Register((*Pipe)(nil)) + state.Register((*Reader)(nil)) + state.Register((*ReaderWriter)(nil)) + state.Register((*VFSPipe)(nil)) + state.Register((*VFSPipeFD)(nil)) + state.Register((*Writer)(nil)) +} diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go deleted file mode 100644 index 867f4a76b..000000000 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestPipeRW(t *testing.T) { - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, 65536) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - msg := []byte("here's some bytes") - wantN := int64(len(msg)) - n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if n != wantN || err != nil { - t.Fatalf("Writev: got (%d, %v), wanted (%d, nil)", n, err, wantN) - } - - buf := make([]byte, len(msg)) - n, err = r.Readv(ctx, usermem.BytesIOSequence(buf)) - if n != wantN || err != nil || !bytes.Equal(buf, msg) { - t.Fatalf("Readv: got (%d, %v) %q, wanted (%d, nil) %q", n, err, buf, wantN, msg) - } -} - -func TestPipeReadBlock(t *testing.T) { - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, 65536) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - n, err := r.Readv(ctx, usermem.BytesIOSequence(make([]byte, 1))) - if n != 0 || err != syserror.ErrWouldBlock { - t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, syserror.ErrWouldBlock) - } -} - -func TestPipeWriteBlock(t *testing.T) { - const atomicIOBytes = 2 - const capacity = MinimumPipeSize - - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, capacity) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - msg := make([]byte, capacity+1) - n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if wantN, wantErr := int64(capacity), syserror.ErrWouldBlock; n != wantN || err != wantErr { - t.Fatalf("Writev: got (%d, %v), wanted (%d, %v)", n, err, wantN, wantErr) - } -} - -func TestPipeWriteUntilEnd(t *testing.T) { - const atomicIOBytes = 2 - - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, atomicIOBytes) - defer r.DecRef(ctx) - defer w.DecRef(ctx) - - msg := []byte("here's some bytes") - - wDone := make(chan struct{}, 0) - rDone := make(chan struct{}, 0) - defer func() { - // Signal the reader to stop and wait until it does so. - close(wDone) - <-rDone - }() - - go func() { - defer close(rDone) - // Read from r until done is closed. - ctx := contexttest.Context(t) - buf := make([]byte, len(msg)+1) - dst := usermem.BytesIOSequence(buf) - e, ch := waiter.NewChannelEntry(nil) - r.EventRegister(&e, waiter.ReadableEvents) - defer r.EventUnregister(&e) - for { - n, err := r.Readv(ctx, dst) - dst = dst.DropFirst64(n) - if err == syserror.ErrWouldBlock { - select { - case <-ch: - continue - case <-wDone: - // We expect to have 1 byte left in dst since len(buf) == - // len(msg)+1. - if dst.NumBytes() != 1 || !bytes.Equal(buf[:len(msg)], msg) { - t.Errorf("Reader: got %q (%d bytes remaining), wanted %q", buf, dst.NumBytes(), msg) - } - return - } - } - if err != nil { - t.Errorf("Readv: got unexpected error %v", err) - return - } - } - }() - - src := usermem.BytesIOSequence(msg) - e, ch := waiter.NewChannelEntry(nil) - w.EventRegister(&e, waiter.WritableEvents) - defer w.EventUnregister(&e) - for src.NumBytes() != 0 { - n, err := w.Writev(ctx, src) - src = src.DropFirst64(n) - if err == syserror.ErrWouldBlock { - <-ch - continue - } - if err != nil { - t.Fatalf("Writev: got (%d, %v)", n, err) - } - } -} diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go new file mode 100644 index 000000000..d3b40feb4 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package pipe diff --git a/pkg/sentry/kernel/process_group_list.go b/pkg/sentry/kernel/process_group_list.go new file mode 100644 index 000000000..9c493504d --- /dev/null +++ b/pkg/sentry/kernel/process_group_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type processGroupElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (processGroupElementMapper) linkerFor(elem *ProcessGroup) *ProcessGroup { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type processGroupList struct { + head *ProcessGroup + tail *ProcessGroup +} + +// Reset resets list l to the empty state. +func (l *processGroupList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *processGroupList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *processGroupList) Front() *ProcessGroup { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *processGroupList) Back() *ProcessGroup { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *processGroupList) Len() (count int) { + for e := l.Front(); e != nil; e = (processGroupElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *processGroupList) PushFront(e *ProcessGroup) { + linker := processGroupElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + processGroupElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *processGroupList) PushBack(e *ProcessGroup) { + linker := processGroupElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + processGroupElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *processGroupList) PushBackList(m *processGroupList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + processGroupElementMapper{}.linkerFor(l.tail).SetNext(m.head) + processGroupElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *processGroupList) InsertAfter(b, e *ProcessGroup) { + bLinker := processGroupElementMapper{}.linkerFor(b) + eLinker := processGroupElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + processGroupElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *processGroupList) InsertBefore(a, e *ProcessGroup) { + aLinker := processGroupElementMapper{}.linkerFor(a) + eLinker := processGroupElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + processGroupElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *processGroupList) Remove(e *ProcessGroup) { + linker := processGroupElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + processGroupElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + processGroupElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type processGroupEntry struct { + next *ProcessGroup + prev *ProcessGroup +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *processGroupEntry) Next() *ProcessGroup { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *processGroupEntry) Prev() *ProcessGroup { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *processGroupEntry) SetNext(elem *ProcessGroup) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *processGroupEntry) SetPrev(elem *ProcessGroup) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/process_group_refs.go b/pkg/sentry/kernel/process_group_refs.go new file mode 100644 index 000000000..cfd73315f --- /dev/null +++ b/pkg/sentry/kernel/process_group_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const ProcessGroupenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var ProcessGroupobj *ProcessGroup + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type ProcessGroupRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *ProcessGroupRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *ProcessGroupRefs) RefType() string { + return fmt.Sprintf("%T", ProcessGroupobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *ProcessGroupRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *ProcessGroupRefs) LogRefs() bool { + return ProcessGroupenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *ProcessGroupRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *ProcessGroupRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if ProcessGroupenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *ProcessGroupRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if ProcessGroupenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *ProcessGroupRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if ProcessGroupenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *ProcessGroupRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD deleted file mode 100644 index 1b82e087b..000000000 --- a/pkg/sentry/kernel/sched/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "sched", - srcs = [ - "cpuset.go", - "sched.go", - ], - visibility = ["//pkg/sentry:internal"], -) - -go_test( - name = "sched_test", - size = "small", - srcs = ["cpuset_test.go"], - library = ":sched", -) diff --git a/pkg/sentry/kernel/sched/cpuset_test.go b/pkg/sentry/kernel/sched/cpuset_test.go deleted file mode 100644 index 3af9f1197..000000000 --- a/pkg/sentry/kernel/sched/cpuset_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sched - -import ( - "testing" -) - -func TestNumCPUs(t *testing.T) { - for i := uint(0); i < 1024; i++ { - c := NewCPUSet(i) - for j := uint(0); j < i; j++ { - c.Set(j) - } - n := c.NumCPUs() - if n != i { - t.Errorf("got wrong number of cpus %d, want %d", n, i) - } - } -} - -func TestClearAbove(t *testing.T) { - const n = 1024 - c := NewFullCPUSet(n) - for i := uint(0); i < n; i++ { - cpu := n - i - c.ClearAbove(cpu) - if got := c.NumCPUs(); got != cpu { - t.Errorf("iteration %d: got %d cpus, wanted %d", i, got, cpu) - } - } -} diff --git a/pkg/sentry/kernel/sched/sched_state_autogen.go b/pkg/sentry/kernel/sched/sched_state_autogen.go new file mode 100644 index 000000000..9705ca79d --- /dev/null +++ b/pkg/sentry/kernel/sched/sched_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package sched diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD deleted file mode 100644 index 2ae08ed12..000000000 --- a/pkg/sentry/kernel/semaphore/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "waiter_list", - out = "waiter_list.go", - package = "semaphore", - prefix = "waiter", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*waiter", - "Linker": "*waiter", - }, -) - -go_library( - name = "semaphore", - srcs = [ - "semaphore.go", - "waiter_list.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/ipc", - "//pkg/sentry/kernel/time", - "//pkg/sync", - "//pkg/syserror", - ], -) - -go_test( - name = "semaphore_test", - size = "small", - srcs = ["semaphore_test.go"], - library = ":semaphore", - deps = [ - "//pkg/abi/linux", # keep - "//pkg/context", # keep - "//pkg/sentry/contexttest", # keep - "//pkg/sentry/kernel/auth", # keep - "//pkg/sentry/kernel/ipc", # keep - "//pkg/syserror", # keep - ], -) diff --git a/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go new file mode 100644 index 000000000..7ea96b30d --- /dev/null +++ b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go @@ -0,0 +1,202 @@ +// automatically generated by stateify. + +package semaphore + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "reg", + "indexes", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.reg) + stateSinkObject.Save(1, &r.indexes) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.reg) + stateSourceObject.Load(1, &r.indexes) +} + +func (s *Set) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.Set" +} + +func (s *Set) StateFields() []string { + return []string{ + "registry", + "obj", + "opTime", + "changeTime", + "sems", + "dead", + } +} + +func (s *Set) beforeSave() {} + +// +checklocksignore +func (s *Set) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.registry) + stateSinkObject.Save(1, &s.obj) + stateSinkObject.Save(2, &s.opTime) + stateSinkObject.Save(3, &s.changeTime) + stateSinkObject.Save(4, &s.sems) + stateSinkObject.Save(5, &s.dead) +} + +func (s *Set) afterLoad() {} + +// +checklocksignore +func (s *Set) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.registry) + stateSourceObject.Load(1, &s.obj) + stateSourceObject.Load(2, &s.opTime) + stateSourceObject.Load(3, &s.changeTime) + stateSourceObject.Load(4, &s.sems) + stateSourceObject.Load(5, &s.dead) +} + +func (s *sem) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.sem" +} + +func (s *sem) StateFields() []string { + return []string{ + "value", + "pid", + } +} + +func (s *sem) beforeSave() {} + +// +checklocksignore +func (s *sem) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + if !state.IsZeroValue(&s.waiters) { + state.Failf("waiters is %#v, expected zero", &s.waiters) + } + stateSinkObject.Save(0, &s.value) + stateSinkObject.Save(1, &s.pid) +} + +func (s *sem) afterLoad() {} + +// +checklocksignore +func (s *sem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.value) + stateSourceObject.Load(1, &s.pid) +} + +func (w *waiter) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.waiter" +} + +func (w *waiter) StateFields() []string { + return []string{ + "waiterEntry", + "value", + "ch", + } +} + +func (w *waiter) beforeSave() {} + +// +checklocksignore +func (w *waiter) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.waiterEntry) + stateSinkObject.Save(1, &w.value) + stateSinkObject.Save(2, &w.ch) +} + +func (w *waiter) afterLoad() {} + +// +checklocksignore +func (w *waiter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.waiterEntry) + stateSourceObject.Load(1, &w.value) + stateSourceObject.Load(2, &w.ch) +} + +func (l *waiterList) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.waiterList" +} + +func (l *waiterList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *waiterList) beforeSave() {} + +// +checklocksignore +func (l *waiterList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *waiterList) afterLoad() {} + +// +checklocksignore +func (l *waiterList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *waiterEntry) StateTypeName() string { + return "pkg/sentry/kernel/semaphore.waiterEntry" +} + +func (e *waiterEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *waiterEntry) beforeSave() {} + +// +checklocksignore +func (e *waiterEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *waiterEntry) afterLoad() {} + +// +checklocksignore +func (e *waiterEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func init() { + state.Register((*Registry)(nil)) + state.Register((*Set)(nil)) + state.Register((*sem)(nil)) + state.Register((*waiter)(nil)) + state.Register((*waiterList)(nil)) + state.Register((*waiterEntry)(nil)) +} diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go deleted file mode 100644 index 2e4ab8121..000000000 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package semaphore - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" - "gvisor.dev/gvisor/pkg/syserror" -) - -func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} { - ch, _, err := set.executeOps(ctx, ops, 123) - if err != nil { - t.Fatalf("ExecuteOps(ops) failed, err: %v, ops: %+v", err, ops) - } - if block { - if ch == nil { - t.Fatalf("ExecuteOps(ops) got: nil, expected: !nil, ops: %+v", ops) - } - if signalled(ch) { - t.Fatalf("ExecuteOps(ops) channel should not have been signalled, ops: %+v", ops) - } - } else { - if ch != nil { - t.Fatalf("ExecuteOps(ops) got: %v, expected: nil, ops: %+v", ch, ops) - } - } - return ch -} - -func signalled(ch chan struct{}) bool { - select { - case <-ch: - return true - default: - return false - } -} - -func TestBasic(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 1}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -1 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -1 - ch1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - if !signalled(ch1) { - t.Fatalf("ExecuteOps(ops) channel should not have been signalled, ops: %+v", ops) - } -} - -func TestWaitForZero(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 0}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -2 - ch1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 0 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = 0 - chZero1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 0 - chZero2 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - if !signalled(ch1) { - t.Fatalf("ExecuteOps(ops) channel should have been signalled, ops: %+v, set: %+v", ops, set) - } - - ops[0].SemOp = -2 - executeOps(ctx, t, set, ops, false) - if !signalled(chZero1) { - t.Fatalf("ExecuteOps(ops) channel zero 1 should have been signalled, ops: %+v, set: %+v", ops, set) - } - if !signalled(chZero2) { - t.Fatalf("ExecuteOps(ops) channel zero 2 should have been signalled, ops: %+v, set: %+v", ops, set) - } -} - -func TestNoWait(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 1}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -2 - ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) - } - - ops[0].SemOp = 0 - ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) - } -} - -func TestUnregister(t *testing.T) { - ctx := contexttest.Context(t) - r := NewRegistry(auth.NewRootUserNamespace()) - set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true) - - if err != nil { - t.Fatalf("FindOrCreate() failed, err: %v", err) - } - if got := r.FindByID(set.obj.ID); got.obj.ID != set.obj.ID { - t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.obj.ID, got, set) - } - - ops := []linux.Sembuf{ - {SemOp: -1}, - } - chs := make([]chan struct{}, 0, 5) - for i := 0; i < 5; i++ { - ch := executeOps(ctx, t, set, ops, true) - chs = append(chs, ch) - } - - creds := auth.CredentialsFromContext(ctx) - if err := r.Remove(set.obj.ID, creds); err != nil { - t.Fatalf("Remove(%d) failed, err: %v", set.obj.ID, err) - } - if !set.dead { - t.Fatalf("set is not dead: %+v", set) - } - if got := r.FindByID(set.obj.ID); got != nil { - t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.obj.ID, got) - } - for i, ch := range chs { - if !signalled(ch) { - t.Fatalf("channel %d should have been signalled", i) - } - } -} diff --git a/pkg/sentry/kernel/semaphore/waiter_list.go b/pkg/sentry/kernel/semaphore/waiter_list.go new file mode 100644 index 000000000..51586d43f --- /dev/null +++ b/pkg/sentry/kernel/semaphore/waiter_list.go @@ -0,0 +1,221 @@ +package semaphore + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type waiterElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (waiterElementMapper) linkerFor(elem *waiter) *waiter { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type waiterList struct { + head *waiter + tail *waiter +} + +// Reset resets list l to the empty state. +func (l *waiterList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *waiterList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *waiterList) Front() *waiter { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *waiterList) Back() *waiter { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *waiterList) Len() (count int) { + for e := l.Front(); e != nil; e = (waiterElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *waiterList) PushFront(e *waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + waiterElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *waiterList) PushBack(e *waiter) { + linker := waiterElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *waiterList) PushBackList(m *waiterList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) + waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *waiterList) InsertAfter(b, e *waiter) { + bLinker := waiterElementMapper{}.linkerFor(b) + eLinker := waiterElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + waiterElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *waiterList) InsertBefore(a, e *waiter) { + aLinker := waiterElementMapper{}.linkerFor(a) + eLinker := waiterElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + waiterElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *waiterList) Remove(e *waiter) { + linker := waiterElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + waiterElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + waiterElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type waiterEntry struct { + next *waiter + prev *waiter +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) Next() *waiter { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) Prev() *waiter { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *waiterEntry) SetNext(elem *waiter) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *waiterEntry) SetPrev(elem *waiter) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go new file mode 100644 index 000000000..d45cc0a0f --- /dev/null +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -0,0 +1,38 @@ +package kernel + +import ( + "unsafe" + + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/sync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in seq. +// +//go:nosplit +func SeqAtomicLoadTaskGoroutineSchedInfo(seq *sync.SeqCount, ptr *TaskGoroutineSchedInfo) TaskGoroutineSchedInfo { + for { + if val, ok := SeqAtomicTryLoadTaskGoroutineSchedInfo(seq, seq.BeginRead(), ptr); ok { + return val + } + } +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in seq initiated by a call to seq.BeginRead() that returned epoch. If the +// read would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +// +//go:nosplit +func SeqAtomicTryLoadTaskGoroutineSchedInfo(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *TaskGoroutineSchedInfo) (val TaskGoroutineSchedInfo, ok bool) { + if sync.RaceEnabled { + + gohacks.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + ok = seq.ReadOk(epoch) + return +} diff --git a/pkg/sentry/kernel/session_list.go b/pkg/sentry/kernel/session_list.go new file mode 100644 index 000000000..f53dea401 --- /dev/null +++ b/pkg/sentry/kernel/session_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type sessionElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (sessionElementMapper) linkerFor(elem *Session) *Session { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type sessionList struct { + head *Session + tail *Session +} + +// Reset resets list l to the empty state. +func (l *sessionList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *sessionList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *sessionList) Front() *Session { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *sessionList) Back() *Session { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *sessionList) Len() (count int) { + for e := l.Front(); e != nil; e = (sessionElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *sessionList) PushFront(e *Session) { + linker := sessionElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + sessionElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *sessionList) PushBack(e *Session) { + linker := sessionElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + sessionElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *sessionList) PushBackList(m *sessionList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + sessionElementMapper{}.linkerFor(l.tail).SetNext(m.head) + sessionElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *sessionList) InsertAfter(b, e *Session) { + bLinker := sessionElementMapper{}.linkerFor(b) + eLinker := sessionElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + sessionElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *sessionList) InsertBefore(a, e *Session) { + aLinker := sessionElementMapper{}.linkerFor(a) + eLinker := sessionElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + sessionElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *sessionList) Remove(e *Session) { + linker := sessionElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + sessionElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + sessionElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type sessionEntry struct { + next *Session + prev *Session +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *sessionEntry) Next() *Session { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *sessionEntry) Prev() *Session { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *sessionEntry) SetNext(elem *Session) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *sessionEntry) SetPrev(elem *Session) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/session_refs.go b/pkg/sentry/kernel/session_refs.go new file mode 100644 index 000000000..94d6380fa --- /dev/null +++ b/pkg/sentry/kernel/session_refs.go @@ -0,0 +1,140 @@ +package kernel + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const SessionenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var Sessionobj *Session + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type SessionRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *SessionRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *SessionRefs) RefType() string { + return fmt.Sprintf("%T", Sessionobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *SessionRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *SessionRefs) LogRefs() bool { + return SessionenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *SessionRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *SessionRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if SessionenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *SessionRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if SessionenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *SessionRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if SessionenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *SessionRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD deleted file mode 100644 index 4e8deac4c..000000000 --- a/pkg/sentry/kernel/shm/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "shm_refs", - out = "shm_refs.go", - consts = { - "enableLogging": "true", - }, - package = "shm", - prefix = "Shm", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "Shm", - }, -) - -go_library( - name = "shm", - srcs = [ - "device.go", - "shm.go", - "shm_refs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/ipc", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/kernel/shm/shm_refs.go b/pkg/sentry/kernel/shm/shm_refs.go new file mode 100644 index 000000000..e6eed69ef --- /dev/null +++ b/pkg/sentry/kernel/shm/shm_refs.go @@ -0,0 +1,140 @@ +package shm + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const ShmenableLogging = true + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var Shmobj *Shm + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type ShmRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *ShmRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *ShmRefs) RefType() string { + return fmt.Sprintf("%T", Shmobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *ShmRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *ShmRefs) LogRefs() bool { + return ShmenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *ShmRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *ShmRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if ShmenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *ShmRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if ShmenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *ShmRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if ShmenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *ShmRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/kernel/shm/shm_state_autogen.go b/pkg/sentry/kernel/shm/shm_state_autogen.go new file mode 100644 index 000000000..9dde9122d --- /dev/null +++ b/pkg/sentry/kernel/shm/shm_state_autogen.go @@ -0,0 +1,129 @@ +// automatically generated by stateify. + +package shm + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *Registry) StateTypeName() string { + return "pkg/sentry/kernel/shm.Registry" +} + +func (r *Registry) StateFields() []string { + return []string{ + "userNS", + "reg", + "totalPages", + } +} + +func (r *Registry) beforeSave() {} + +// +checklocksignore +func (r *Registry) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.userNS) + stateSinkObject.Save(1, &r.reg) + stateSinkObject.Save(2, &r.totalPages) +} + +func (r *Registry) afterLoad() {} + +// +checklocksignore +func (r *Registry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.userNS) + stateSourceObject.Load(1, &r.reg) + stateSourceObject.Load(2, &r.totalPages) +} + +func (s *Shm) StateTypeName() string { + return "pkg/sentry/kernel/shm.Shm" +} + +func (s *Shm) StateFields() []string { + return []string{ + "ShmRefs", + "mfp", + "registry", + "size", + "effectiveSize", + "fr", + "obj", + "attachTime", + "detachTime", + "changeTime", + "creatorPID", + "lastAttachDetachPID", + "pendingDestruction", + } +} + +func (s *Shm) beforeSave() {} + +// +checklocksignore +func (s *Shm) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.ShmRefs) + stateSinkObject.Save(1, &s.mfp) + stateSinkObject.Save(2, &s.registry) + stateSinkObject.Save(3, &s.size) + stateSinkObject.Save(4, &s.effectiveSize) + stateSinkObject.Save(5, &s.fr) + stateSinkObject.Save(6, &s.obj) + stateSinkObject.Save(7, &s.attachTime) + stateSinkObject.Save(8, &s.detachTime) + stateSinkObject.Save(9, &s.changeTime) + stateSinkObject.Save(10, &s.creatorPID) + stateSinkObject.Save(11, &s.lastAttachDetachPID) + stateSinkObject.Save(12, &s.pendingDestruction) +} + +func (s *Shm) afterLoad() {} + +// +checklocksignore +func (s *Shm) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.ShmRefs) + stateSourceObject.Load(1, &s.mfp) + stateSourceObject.Load(2, &s.registry) + stateSourceObject.Load(3, &s.size) + stateSourceObject.Load(4, &s.effectiveSize) + stateSourceObject.Load(5, &s.fr) + stateSourceObject.Load(6, &s.obj) + stateSourceObject.Load(7, &s.attachTime) + stateSourceObject.Load(8, &s.detachTime) + stateSourceObject.Load(9, &s.changeTime) + stateSourceObject.Load(10, &s.creatorPID) + stateSourceObject.Load(11, &s.lastAttachDetachPID) + stateSourceObject.Load(12, &s.pendingDestruction) +} + +func (r *ShmRefs) StateTypeName() string { + return "pkg/sentry/kernel/shm.ShmRefs" +} + +func (r *ShmRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *ShmRefs) beforeSave() {} + +// +checklocksignore +func (r *ShmRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *ShmRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func init() { + state.Register((*Registry)(nil)) + state.Register((*Shm)(nil)) + state.Register((*ShmRefs)(nil)) +} diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD deleted file mode 100644 index 1110ecca5..000000000 --- a/pkg/sentry/kernel/signalfd/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "signalfd", - srcs = ["signalfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go b/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go new file mode 100644 index 000000000..ad2622f27 --- /dev/null +++ b/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go @@ -0,0 +1,39 @@ +// automatically generated by stateify. + +package signalfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SignalOperations) StateTypeName() string { + return "pkg/sentry/kernel/signalfd.SignalOperations" +} + +func (s *SignalOperations) StateFields() []string { + return []string{ + "target", + "mask", + } +} + +func (s *SignalOperations) beforeSave() {} + +// +checklocksignore +func (s *SignalOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.target) + stateSinkObject.Save(1, &s.mask) +} + +func (s *SignalOperations) afterLoad() {} + +// +checklocksignore +func (s *SignalOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.target) + stateSourceObject.Load(1, &s.mask) +} + +func init() { + state.Register((*SignalOperations)(nil)) +} diff --git a/pkg/sentry/kernel/socket_list.go b/pkg/sentry/kernel/socket_list.go new file mode 100644 index 000000000..2b2f5055e --- /dev/null +++ b/pkg/sentry/kernel/socket_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type socketElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (socketElementMapper) linkerFor(elem *SocketRecordVFS1) *SocketRecordVFS1 { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type socketList struct { + head *SocketRecordVFS1 + tail *SocketRecordVFS1 +} + +// Reset resets list l to the empty state. +func (l *socketList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *socketList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *socketList) Front() *SocketRecordVFS1 { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *socketList) Back() *SocketRecordVFS1 { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *socketList) Len() (count int) { + for e := l.Front(); e != nil; e = (socketElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *socketList) PushFront(e *SocketRecordVFS1) { + linker := socketElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + socketElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *socketList) PushBack(e *SocketRecordVFS1) { + linker := socketElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + socketElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *socketList) PushBackList(m *socketList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + socketElementMapper{}.linkerFor(l.tail).SetNext(m.head) + socketElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *socketList) InsertAfter(b, e *SocketRecordVFS1) { + bLinker := socketElementMapper{}.linkerFor(b) + eLinker := socketElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + socketElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *socketList) InsertBefore(a, e *SocketRecordVFS1) { + aLinker := socketElementMapper{}.linkerFor(a) + eLinker := socketElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + socketElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *socketList) Remove(e *SocketRecordVFS1) { + linker := socketElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + socketElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + socketElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type socketEntry struct { + next *SocketRecordVFS1 + prev *SocketRecordVFS1 +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *socketEntry) Next() *SocketRecordVFS1 { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *socketEntry) Prev() *SocketRecordVFS1 { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *socketEntry) SetNext(elem *SocketRecordVFS1) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *socketEntry) SetPrev(elem *SocketRecordVFS1) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/table_test.go b/pkg/sentry/kernel/table_test.go deleted file mode 100644 index 32cf47e05..000000000 --- a/pkg/sentry/kernel/table_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi" - "gvisor.dev/gvisor/pkg/sentry/arch" -) - -const ( - maxTestSyscall = 1000 -) - -func createSyscallTable() *SyscallTable { - m := make(map[uintptr]Syscall) - for i := uintptr(0); i <= maxTestSyscall; i++ { - j := i - m[i] = Syscall{ - Fn: func(*Task, arch.SyscallArguments) (uintptr, *SyscallControl, error) { - return j, nil, nil - }, - } - } - - s := &SyscallTable{ - OS: abi.Linux, - Arch: arch.AMD64, - Table: m, - } - - RegisterSyscallTable(s) - return s -} - -func TestTable(t *testing.T) { - table := createSyscallTable() - defer func() { - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} - }() - - // Go through all functions and check that they return the right value. - for i := uintptr(0); i < maxTestSyscall; i++ { - fn := table.Lookup(i) - if fn == nil { - t.Errorf("Syscall %v is set to nil", i) - continue - } - - v, _, _ := fn(nil, arch.SyscallArguments{}) - if v != i { - t.Errorf("Wrong return value for syscall %v: expected %v, got %v", i, i, v) - } - } - - // Check that values outside the range return nil. - for i := uintptr(maxTestSyscall + 1); i < maxTestSyscall+100; i++ { - fn := table.Lookup(i) - if fn != nil { - t.Errorf("Syscall %v is not nil: %v", i, fn) - continue - } - } -} - -func BenchmarkTableLookup(b *testing.B) { - table := createSyscallTable() - - b.ResetTimer() - - j := uintptr(0) - for i := 0; i < b.N; i++ { - table.Lookup(j) - j = (j + 1) % 310 - } - - b.StopTimer() - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} -} - -func BenchmarkTableMapLookup(b *testing.B) { - table := createSyscallTable() - - b.ResetTimer() - - j := uintptr(0) - for i := 0; i < b.N; i++ { - table.mapLookup(j) - j = (j + 1) % 310 - } - - b.StopTimer() - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} -} diff --git a/pkg/sentry/kernel/task_list.go b/pkg/sentry/kernel/task_list.go new file mode 100644 index 000000000..66df9ac45 --- /dev/null +++ b/pkg/sentry/kernel/task_list.go @@ -0,0 +1,221 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type taskElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (taskElementMapper) linkerFor(elem *Task) *Task { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type taskList struct { + head *Task + tail *Task +} + +// Reset resets list l to the empty state. +func (l *taskList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *taskList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *taskList) Front() *Task { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *taskList) Back() *Task { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *taskList) Len() (count int) { + for e := l.Front(); e != nil; e = (taskElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *taskList) PushFront(e *Task) { + linker := taskElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + taskElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *taskList) PushBack(e *Task) { + linker := taskElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + taskElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *taskList) PushBackList(m *taskList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + taskElementMapper{}.linkerFor(l.tail).SetNext(m.head) + taskElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *taskList) InsertAfter(b, e *Task) { + bLinker := taskElementMapper{}.linkerFor(b) + eLinker := taskElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + taskElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *taskList) InsertBefore(a, e *Task) { + aLinker := taskElementMapper{}.linkerFor(a) + eLinker := taskElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + taskElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *taskList) Remove(e *Task) { + linker := taskElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + taskElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + taskElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type taskEntry struct { + next *Task + prev *Task +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *taskEntry) Next() *Task { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *taskEntry) Prev() *Task { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *taskEntry) SetNext(elem *Task) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *taskEntry) SetPrev(elem *Task) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/task_test.go b/pkg/sentry/kernel/task_test.go deleted file mode 100644 index cfcde9a7a..000000000 --- a/pkg/sentry/kernel/task_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/kernel/sched" -) - -func TestTaskCPU(t *testing.T) { - for _, test := range []struct { - mask sched.CPUSet - tid ThreadID - cpu int32 - }{ - { - mask: []byte{0xff}, - tid: 1, - cpu: 0, - }, - { - mask: []byte{0xff}, - tid: 10, - cpu: 1, - }, - { - // more than 8 cpus. - mask: []byte{0xff, 0xff}, - tid: 10, - cpu: 9, - }, - { - // missing the first cpu. - mask: []byte{0xfe}, - tid: 1, - cpu: 1, - }, - { - mask: []byte{0xfe}, - tid: 10, - cpu: 3, - }, - { - // missing the fifth cpu. - mask: []byte{0xef}, - tid: 10, - cpu: 2, - }, - } { - assigned := assignCPU(test.mask, test.tid) - if test.cpu != assigned { - t.Errorf("assignCPU(%v, %v) got %v, want %v", test.mask, test.tid, assigned, test.cpu) - } - } - -} diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD deleted file mode 100644 index e293d9a0f..000000000 --- a/pkg/sentry/kernel/time/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "time", - srcs = [ - "context.go", - "tcpip.go", - "time.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sync", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/time/time_state_autogen.go b/pkg/sentry/kernel/time/time_state_autogen.go new file mode 100644 index 000000000..855751953 --- /dev/null +++ b/pkg/sentry/kernel/time/time_state_autogen.go @@ -0,0 +1,103 @@ +// automatically generated by stateify. + +package time + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *Time) StateTypeName() string { + return "pkg/sentry/kernel/time.Time" +} + +func (t *Time) StateFields() []string { + return []string{ + "ns", + } +} + +func (t *Time) beforeSave() {} + +// +checklocksignore +func (t *Time) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.ns) +} + +func (t *Time) afterLoad() {} + +// +checklocksignore +func (t *Time) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.ns) +} + +func (s *Setting) StateTypeName() string { + return "pkg/sentry/kernel/time.Setting" +} + +func (s *Setting) StateFields() []string { + return []string{ + "Enabled", + "Next", + "Period", + } +} + +func (s *Setting) beforeSave() {} + +// +checklocksignore +func (s *Setting) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Enabled) + stateSinkObject.Save(1, &s.Next) + stateSinkObject.Save(2, &s.Period) +} + +func (s *Setting) afterLoad() {} + +// +checklocksignore +func (s *Setting) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Enabled) + stateSourceObject.Load(1, &s.Next) + stateSourceObject.Load(2, &s.Period) +} + +func (t *Timer) StateTypeName() string { + return "pkg/sentry/kernel/time.Timer" +} + +func (t *Timer) StateFields() []string { + return []string{ + "clock", + "listener", + "setting", + "paused", + } +} + +func (t *Timer) beforeSave() {} + +// +checklocksignore +func (t *Timer) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.clock) + stateSinkObject.Save(1, &t.listener) + stateSinkObject.Save(2, &t.setting) + stateSinkObject.Save(3, &t.paused) +} + +func (t *Timer) afterLoad() {} + +// +checklocksignore +func (t *Timer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.clock) + stateSourceObject.Load(1, &t.listener) + stateSourceObject.Load(2, &t.setting) + stateSourceObject.Load(3, &t.paused) +} + +func init() { + state.Register((*Time)(nil)) + state.Register((*Setting)(nil)) + state.Register((*Timer)(nil)) +} diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go deleted file mode 100644 index b6039505a..000000000 --- a/pkg/sentry/kernel/timekeeper_test.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - sentrytime "gvisor.dev/gvisor/pkg/sentry/time" - "gvisor.dev/gvisor/pkg/sentry/usage" -) - -// mockClocks is a sentrytime.Clocks that simply returns the times in the -// struct. -type mockClocks struct { - monotonic int64 - realtime int64 -} - -// Update implements sentrytime.Clocks.Update. It does nothing. -func (*mockClocks) Update() (monotonicParams sentrytime.Parameters, monotonicOk bool, realtimeParam sentrytime.Parameters, realtimeOk bool) { - return -} - -// Update implements sentrytime.Clocks.GetTime. -func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) { - switch id { - case sentrytime.Monotonic: - return c.monotonic, nil - case sentrytime.Realtime: - return c.realtime, nil - default: - return 0, linuxerr.EINVAL - } -} - -// stateTestClocklessTimekeeper returns a test Timekeeper which has not had -// SetClocks called. -func stateTestClocklessTimekeeper(tb testing.TB) *Timekeeper { - ctx := contexttest.Context(tb) - mfp := pgalloc.MemoryFileProviderFromContext(ctx) - fr, err := mfp.MemoryFile().Allocate(hostarch.PageSize, usage.Anonymous) - if err != nil { - tb.Fatalf("failed to allocate memory: %v", err) - } - return &Timekeeper{ - params: NewVDSOParamPage(mfp, fr), - } -} - -func stateTestTimekeeper(tb testing.TB) *Timekeeper { - t := stateTestClocklessTimekeeper(tb) - t.SetClocks(sentrytime.NewCalibratedClocks()) - return t -} - -// TestTimekeeperMonotonicZero tests that monotonic time starts at zero. -func TestTimekeeperMonotonicZero(t *testing.T) { - c := &mockClocks{ - monotonic: 100000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.SetClocks(c) - defer tk.Destroy() - - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 0 { - t.Errorf("GetTime got %d want 0", now) - } - - c.monotonic += 10 - - now, err = tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 10 { - t.Errorf("GetTime got %d want 10", now) - } -} - -// TestTimekeeperMonotonicJumpForward tests that monotonic time jumps forward -// after restore. -func TestTimekeeperMonotonicForward(t *testing.T) { - c := &mockClocks{ - monotonic: 900000, - realtime: 600000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.restored = make(chan struct{}) - tk.saveMonotonic = 100000 - tk.saveRealtime = 400000 - tk.SetClocks(c) - defer tk.Destroy() - - // The monotonic clock should jump ahead by 200000 to 300000. - // - // The new system monotonic time (900000) is irrelevant to what the app - // sees. - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 300000 { - t.Errorf("GetTime got %d want 300000", now) - } -} - -// TestTimekeeperMonotonicJumpBackwards tests that monotonic time does not jump -// backwards when realtime goes backwards. -func TestTimekeeperMonotonicJumpBackwards(t *testing.T) { - c := &mockClocks{ - monotonic: 900000, - realtime: 400000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.restored = make(chan struct{}) - tk.saveMonotonic = 100000 - tk.saveRealtime = 600000 - tk.SetClocks(c) - defer tk.Destroy() - - // The monotonic clock should remain at 100000. - // - // The new system monotonic time (900000) is irrelevant to what the app - // sees and we don't want to jump the monotonic clock backwards like - // realtime did. - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 100000 { - t.Errorf("GetTime got %d want 100000", now) - } -} diff --git a/pkg/sentry/kernel/uncaught_signal.proto b/pkg/sentry/kernel/uncaught_signal.proto deleted file mode 100644 index 0bdb062cb..000000000 --- a/pkg/sentry/kernel/uncaught_signal.proto +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -import "pkg/sentry/arch/registers.proto"; - -message UncaughtSignal { - // Thread ID. - int32 tid = 1; - - // Process ID. - int32 pid = 2; - - // Registers at the time of the fault or signal. - Registers registers = 3; - - // Signal number. - int32 signal_number = 4; - - // The memory location which caused the fault (set if applicable, 0 - // otherwise). This will be set for SIGILL, SIGFPE, SIGSEGV, and SIGBUS. - uint64 fault_addr = 5; -} diff --git a/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go new file mode 100644 index 000000000..54955f061 --- /dev/null +++ b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go @@ -0,0 +1,193 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/kernel/uncaught_signal.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + registers_go_proto "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type UncaughtSignal struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Tid int32 `protobuf:"varint,1,opt,name=tid,proto3" json:"tid,omitempty"` + Pid int32 `protobuf:"varint,2,opt,name=pid,proto3" json:"pid,omitempty"` + Registers *registers_go_proto.Registers `protobuf:"bytes,3,opt,name=registers,proto3" json:"registers,omitempty"` + SignalNumber int32 `protobuf:"varint,4,opt,name=signal_number,json=signalNumber,proto3" json:"signal_number,omitempty"` + FaultAddr uint64 `protobuf:"varint,5,opt,name=fault_addr,json=faultAddr,proto3" json:"fault_addr,omitempty"` +} + +func (x *UncaughtSignal) Reset() { + *x = UncaughtSignal{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *UncaughtSignal) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UncaughtSignal) ProtoMessage() {} + +func (x *UncaughtSignal) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UncaughtSignal.ProtoReflect.Descriptor instead. +func (*UncaughtSignal) Descriptor() ([]byte, []int) { + return file_pkg_sentry_kernel_uncaught_signal_proto_rawDescGZIP(), []int{0} +} + +func (x *UncaughtSignal) GetTid() int32 { + if x != nil { + return x.Tid + } + return 0 +} + +func (x *UncaughtSignal) GetPid() int32 { + if x != nil { + return x.Pid + } + return 0 +} + +func (x *UncaughtSignal) GetRegisters() *registers_go_proto.Registers { + if x != nil { + return x.Registers + } + return nil +} + +func (x *UncaughtSignal) GetSignalNumber() int32 { + if x != nil { + return x.SignalNumber + } + return 0 +} + +func (x *UncaughtSignal) GetFaultAddr() uint64 { + if x != nil { + return x.FaultAddr + } + return 0 +} + +var File_pkg_sentry_kernel_uncaught_signal_proto protoreflect.FileDescriptor + +var file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc = []byte{ + 0x0a, 0x27, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x6b, 0x65, 0x72, + 0x6e, 0x65, 0x6c, 0x2f, 0x75, 0x6e, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x5f, 0x73, 0x69, 0x67, + 0x6e, 0x61, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, + 0x72, 0x1a, 0x1f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x61, 0x72, + 0x63, 0x68, 0x2f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x22, 0xa9, 0x01, 0x0a, 0x0e, 0x55, 0x6e, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x53, + 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x74, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x03, 0x74, 0x69, 0x64, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, 0x2f, 0x0a, 0x09, 0x72, 0x65, 0x67, + 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x67, + 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x52, + 0x09, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x69, + 0x67, 0x6e, 0x61, 0x6c, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x0c, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, + 0x1d, 0x0a, 0x0a, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x61, 0x64, 0x64, 0x72, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x09, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x41, 0x64, 0x64, 0x72, 0x62, 0x06, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescOnce sync.Once + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData = file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc +) + +func file_pkg_sentry_kernel_uncaught_signal_proto_rawDescGZIP() []byte { + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescOnce.Do(func() { + file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData) + }) + return file_pkg_sentry_kernel_uncaught_signal_proto_rawDescData +} + +var file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_pkg_sentry_kernel_uncaught_signal_proto_goTypes = []interface{}{ + (*UncaughtSignal)(nil), // 0: gvisor.UncaughtSignal + (*registers_go_proto.Registers)(nil), // 1: gvisor.Registers +} +var file_pkg_sentry_kernel_uncaught_signal_proto_depIdxs = []int32{ + 1, // 0: gvisor.UncaughtSignal.registers:type_name -> gvisor.Registers + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_kernel_uncaught_signal_proto_init() } +func file_pkg_sentry_kernel_uncaught_signal_proto_init() { + if File_pkg_sentry_kernel_uncaught_signal_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*UncaughtSignal); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_kernel_uncaught_signal_proto_goTypes, + DependencyIndexes: file_pkg_sentry_kernel_uncaught_signal_proto_depIdxs, + MessageInfos: file_pkg_sentry_kernel_uncaught_signal_proto_msgTypes, + }.Build() + File_pkg_sentry_kernel_uncaught_signal_proto = out.File + file_pkg_sentry_kernel_uncaught_signal_proto_rawDesc = nil + file_pkg_sentry_kernel_uncaught_signal_proto_goTypes = nil + file_pkg_sentry_kernel_uncaught_signal_proto_depIdxs = nil +} diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD deleted file mode 100644 index 21b0d1595..000000000 --- a/pkg/sentry/limits/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "limits", - srcs = [ - "context.go", - "limits.go", - "linux.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "limits_test", - size = "small", - srcs = [ - "limits_test.go", - ], - library = ":limits", - deps = ["@org_golang_x_sys//unix:go_default_library"], -) diff --git a/pkg/sentry/limits/limits_state_autogen.go b/pkg/sentry/limits/limits_state_autogen.go new file mode 100644 index 000000000..038124f76 --- /dev/null +++ b/pkg/sentry/limits/limits_state_autogen.go @@ -0,0 +1,65 @@ +// automatically generated by stateify. + +package limits + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *Limit) StateTypeName() string { + return "pkg/sentry/limits.Limit" +} + +func (l *Limit) StateFields() []string { + return []string{ + "Cur", + "Max", + } +} + +func (l *Limit) beforeSave() {} + +// +checklocksignore +func (l *Limit) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Cur) + stateSinkObject.Save(1, &l.Max) +} + +func (l *Limit) afterLoad() {} + +// +checklocksignore +func (l *Limit) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Cur) + stateSourceObject.Load(1, &l.Max) +} + +func (l *LimitSet) StateTypeName() string { + return "pkg/sentry/limits.LimitSet" +} + +func (l *LimitSet) StateFields() []string { + return []string{ + "data", + } +} + +func (l *LimitSet) beforeSave() {} + +// +checklocksignore +func (l *LimitSet) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.data) +} + +func (l *LimitSet) afterLoad() {} + +// +checklocksignore +func (l *LimitSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.data) +} + +func init() { + state.Register((*Limit)(nil)) + state.Register((*LimitSet)(nil)) +} diff --git a/pkg/sentry/limits/limits_test.go b/pkg/sentry/limits/limits_test.go deleted file mode 100644 index 0ee877be4..000000000 --- a/pkg/sentry/limits/limits_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package limits - -import ( - "testing" - - "golang.org/x/sys/unix" -) - -func TestSet(t *testing.T) { - testCases := []struct { - limit Limit - privileged bool - expectedErr error - }{ - {limit: Limit{Cur: 50, Max: 50}, privileged: false, expectedErr: nil}, - {limit: Limit{Cur: 20, Max: 50}, privileged: false, expectedErr: nil}, - {limit: Limit{Cur: 20, Max: 60}, privileged: false, expectedErr: unix.EPERM}, - {limit: Limit{Cur: 60, Max: 50}, privileged: false, expectedErr: unix.EINVAL}, - {limit: Limit{Cur: 11, Max: 10}, privileged: false, expectedErr: unix.EINVAL}, - {limit: Limit{Cur: 20, Max: 60}, privileged: true, expectedErr: nil}, - } - - ls := NewLimitSet() - for _, tc := range testCases { - if _, err := ls.Set(1, tc.limit, tc.privileged); err != tc.expectedErr { - t.Fatalf("Tried to set Limit to %+v and privilege %t: got %v, wanted %v", tc.limit, tc.privileged, err, tc.expectedErr) - } - } - -} diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD deleted file mode 100644 index 54bfed644..000000000 --- a/pkg/sentry/loader/BUILD +++ /dev/null @@ -1,43 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "loader", - srcs = [ - "elf.go", - "interpreter.go", - "loader.go", - "vdso.go", - "vdso_state.go", - ], - marshal = True, - marshal_debug = True, - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/abi/linux/errno", - "//pkg/context", - "//pkg/cpuid", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/rand", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fsbridge", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/limits", - "//pkg/sentry/loader/vdsodata", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/loader/loader_abi_autogen_unsafe.go b/pkg/sentry/loader/loader_abi_autogen_unsafe.go new file mode 100644 index 000000000..618cb60fc --- /dev/null +++ b/pkg/sentry/loader/loader_abi_autogen_unsafe.go @@ -0,0 +1,7 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package loader + +import ( +) + diff --git a/pkg/sentry/loader/loader_state_autogen.go b/pkg/sentry/loader/loader_state_autogen.go new file mode 100644 index 000000000..7d001c54b --- /dev/null +++ b/pkg/sentry/loader/loader_state_autogen.go @@ -0,0 +1,96 @@ +// automatically generated by stateify. + +package loader + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (v *VDSO) StateTypeName() string { + return "pkg/sentry/loader.VDSO" +} + +func (v *VDSO) StateFields() []string { + return []string{ + "ParamPage", + "vdso", + "os", + "arch", + "phdrs", + } +} + +func (v *VDSO) beforeSave() {} + +// +checklocksignore +func (v *VDSO) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + var phdrsValue []elfProgHeader = v.savePhdrs() + stateSinkObject.SaveValue(4, phdrsValue) + stateSinkObject.Save(0, &v.ParamPage) + stateSinkObject.Save(1, &v.vdso) + stateSinkObject.Save(2, &v.os) + stateSinkObject.Save(3, &v.arch) +} + +func (v *VDSO) afterLoad() {} + +// +checklocksignore +func (v *VDSO) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.ParamPage) + stateSourceObject.Load(1, &v.vdso) + stateSourceObject.Load(2, &v.os) + stateSourceObject.Load(3, &v.arch) + stateSourceObject.LoadValue(4, new([]elfProgHeader), func(y interface{}) { v.loadPhdrs(y.([]elfProgHeader)) }) +} + +func (e *elfProgHeader) StateTypeName() string { + return "pkg/sentry/loader.elfProgHeader" +} + +func (e *elfProgHeader) StateFields() []string { + return []string{ + "Type", + "Flags", + "Off", + "Vaddr", + "Paddr", + "Filesz", + "Memsz", + "Align", + } +} + +func (e *elfProgHeader) beforeSave() {} + +// +checklocksignore +func (e *elfProgHeader) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Type) + stateSinkObject.Save(1, &e.Flags) + stateSinkObject.Save(2, &e.Off) + stateSinkObject.Save(3, &e.Vaddr) + stateSinkObject.Save(4, &e.Paddr) + stateSinkObject.Save(5, &e.Filesz) + stateSinkObject.Save(6, &e.Memsz) + stateSinkObject.Save(7, &e.Align) +} + +func (e *elfProgHeader) afterLoad() {} + +// +checklocksignore +func (e *elfProgHeader) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Type) + stateSourceObject.Load(1, &e.Flags) + stateSourceObject.Load(2, &e.Off) + stateSourceObject.Load(3, &e.Vaddr) + stateSourceObject.Load(4, &e.Paddr) + stateSourceObject.Load(5, &e.Filesz) + stateSourceObject.Load(6, &e.Memsz) + stateSourceObject.Load(7, &e.Align) +} + +func init() { + state.Register((*VDSO)(nil)) + state.Register((*elfProgHeader)(nil)) +} diff --git a/pkg/sentry/loader/vdsodata/BUILD b/pkg/sentry/loader/vdsodata/BUILD deleted file mode 100644 index 119199f97..000000000 --- a/pkg/sentry/loader/vdsodata/BUILD +++ /dev/null @@ -1,38 +0,0 @@ -load("//tools:defs.bzl", "go_add_tags", "go_embed_data", "go_library") - -package(licenses = ["notice"]) - -go_embed_data( - name = "vdso_bin", - src = "//vdso:vdso.so", - package = "vdsodata", - var = "Binary", -) - -[ - # Generate multiple tagged files. Note that the contents of all files - # will be the same (i.e. vdso_arm64.go will contain the amd64 vdso), but - # the build tags will ensure only one is selected. When we generate the - # "Go" branch, we select all archiecture files from the relevant build. - # This is a hack around some limitations for "out" being a configurable - # attribute and selects for srcs. See also tools/go_branch.sh. - go_add_tags( - name = "vdso_%s" % arch, - src = ":vdso_bin", - out = "vdso_%s.go" % arch, - go_tags = [arch], - ) - for arch in ("amd64", "arm64") -] - -go_library( - name = "vdsodata", - srcs = [ - "vdsodata.go", - ":vdso_amd64", - ":vdso_arm64", - ], - marshal = False, - stateify = False, - visibility = ["//pkg/sentry:internal"], -) diff --git a/pkg/sentry/loader/vdsodata/vdso_amd64.go b/pkg/sentry/loader/vdsodata/vdso_amd64.go new file mode 100644 index 000000000..ff53489f8 --- /dev/null +++ b/pkg/sentry/loader/vdsodata/vdso_amd64.go @@ -0,0 +1,7 @@ +// +build amd64 + +// Generated by go_embed_data for //pkg/sentry/loader/vdsodata:vdso_bin. DO NOT EDIT. + +package vdsodata + +var Binary = []byte("ELF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00>\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00X\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x008\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00p\xff\xff\xff\xff\xff\xae\x00\x00\x00\x00\x00\x00\xae\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0\x00\x00\x00\x00\x00\x00\xe0p\xff\xff\xff\xff\xff\xe0p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00Tp\xff\xff\xff\xff\xffTp\xff\xff\xff\xff\xff`\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\xe5td\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00\xb4p\xff\xff\xff\xff\xff\xb4p\xff\xff\xff\xff\xffD\x00\x00\x00\x00\x00\x00\x00D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\"\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\"\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\"\x00\x00\xf0p\xff\xff\xff\xff\xff&\x00\x00\x00\x00\x00\x00\x005\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff&\x00\x00\x00\x00\x00\x00\x00A\x00\x00\x00\"\x00\x00\x80p\xff\xff\xff\xff\xffb\x00\x00\x00\x00\x00\x00\x00N\x00\x00\x00\x00\x00\x80p\xff\xff\xff\xff\xffb\x00\x00\x00\x00\x00\x00\x00b\x00\x00\x00\"\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00\x85\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00linux-vdso.so.1\x00LINUX_2.6\x00getcpu\x00__vdso_getcpu\x00time\x00__vdso_time\x00gettimeofday\x00__vdso_gettimeofday\x00clock_gettime\x00__vdso_clock_gettime\x00__kernel_rt_sigreturn\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa1\xbf\xee
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6u\xae\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00GNU\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00GNU\x00gold 1.16\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00GNU\x00g\xf3E\xee\xe6C\n\x9a<\x8b\xa5L\xddU\x9fN;@\x00\x00\x00\x00\x00\x00|\x00\x00\\\x00\x00\x00\x8c\x00\x00t\x00\x00\x00\xcc\x00\x00\x8c\x00\x00\x00<
\x00\x00\xb4\x00\x00\x00l
\x00\x00\xd4\x00\x00\x00|
\x00\x00\xec\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00zR\x00x\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00L\x00\x00\x008\x00\x00b\x00\x00\x00\x00E\x86D\x83D0RAA\x00\x00\x00t\x00\x00\x00\x80\x00\x00&\x00\x00\x00\x00E\x83G XA\x00\x00\x00\x00\x94\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00\x88\x00\x00\xbb\x00\x00\x00\x00E\x83~\nmJ\x00\x00\x00\xcc\x00\x00\x00(
\x00\x00\xbe\x00\x00\x00\x00E\x83~\nmM\x00\x00\x00\x00\x00\x00\x00`p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00hp\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00\x9b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xff\xffo\x00\x00\x00\x00p\xff\xff\xff\xff\xff\xfc\xff\xffo\x00\x00\x00\x00p\xff\xff\xff\xff\xff\xfd\xff\xffo\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf3\xfa\xb8\x00\x00\x00\xc3@\x00\xf3\xfa\x83\xfft\x83\xfft\x85\xfft\xb8\xe4\x00\x00\x00\xc3fD\x00\x00H\x89\xf7\xe9\x88\x00\x00\x84\x00\x00\x00\x00\x00H\x89\xf7\xe9\xb8\x00\x00\x00\x84\x00\x00\x00\x00\x00\xf3\xfaUH\x89\xf5SH\x83\xecH\x85\xfft:H\x89\xfbH\x89\xe7\xe8\x93\x00\x00\x00\x85\xc0u:H\x8b$H\x8bL$H\xba\xcf\xf7S㥛\xc4 H\x89H\x89\xc8H\xc1\xf9?H\xf7\xeaH\xc1\xfaH)\xcaH\x89S1\xc0H\x85\xedtH\xc7E\x00\x00\x00\x00\x00H\x83\xc4[]\xc3ff.\x84\x00\x00\x00\x00\x00\x00\xf3\xfaSH\x89\xfbH\x83\xecH\x89\xe7\xe8,\x00\x00\x00H\x8b$H\x85\xdbtH\x89H\x83\xc4[\xc3f.\x84\x00\x00\x00\x00\x00\xf3\xfa\xb85\x00\x00H\x98Ð\x90\xf3\xfaSH\x89\xfeH\x8d
\xc1\xde\xff\xffH\x8b9\x83\xe7\xfeL\x8bQ(L\x8bA8H\x8bY0L\x8bY@Lc\xcf\xae\xe81H\x8b9L9\xcfu\xddM\x85\xd2tv\x89\xc0H\xc1\xe2 H \xc21\xc0H9\xd3+H\xb8\x00\x00\x00\x00\x00ʚ;H\x89\xd11\xd2I\xf7\xf3H)\xd9H\x89\xcfH\xc1\xff?H\xaf\xf8H\xf7\xe1H\xfaH\xac\xd0 H\xb9SZ\x9b\xa0/\xb8D\x00I\xc0[L\x89\xc2H\xc1\xea H\x89\xd0H\xf7\xe11\xc0H\xc1\xeaH\x89Hi\xd2\x00ʚ;I)\xd0L\x89F\xc3\x84\x00\x00\x00\x00\x00\xb8\xe4\x00\x00\x001\xff[\xc3D\x00\x00\xf3\xfaSH\x89\xfeH\x8d
\xde\xff\xffH\x8b9\x83\xe7\xfeL\x8bQL\x8bAH\x8bYL\x8bY Lc\xcf\xae\xe81H\x8b9L9\xcfu\xddM\x85\xd2tv\x89\xc0H\xc1\xe2 H \xc21\xc0H9\xd3+H\xb8\x00\x00\x00\x00\x00ʚ;H\x89\xd11\xd2I\xf7\xf3H)\xd9H\x89\xcfH\xc1\xff?H\xaf\xf8H\xf7\xe1H\xfaH\xac\xd0 H\xb9SZ\x9b\xa0/\xb8D\x00I\xc0[L\x89\xc2H\xc1\xea H\x89\xd0H\xf7\xe11\xc0H\xc1\xeaH\x89Hi\xd2\x00ʚ;I)\xd0L\x89F\xc3\x84\x00\x00\x00\x00\x00\xb8\xe4\x00\x00\x00\xbf\x00\x00\x00[\xc3\x00GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\xbb\x00\x00\x00\x00\x00\x00\x009\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff\xbe\x00\x00\x00\x00\x00\x00\x00]\x00\x00\x00 \x00\xe0p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00f\x00\x00\x00\x00\x00\xf1\xff\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00s\x00\x00\x00\x00\x00\xf1\xff\x00\xf0o\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00{\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x85\x00\x00\x00\"\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x8c\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x9a\x00\x00\x00\"\x00\x00\xf0p\xff\xff\xff\xff\xff&\x00\x00\x00\x00\x00\x00\x00\x9f\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff&\x00\x00\x00\x00\x00\x00\x00\xab\x00\x00\x00\"\x00\x00\x80p\xff\xff\xff\xff\xffb\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x80p\xff\xff\xff\xff\xffb\x00\x00\x00\x00\x00\x00\x00\xcc\x00\x00\x00\"\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00\xda\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00vdso.cc\x00vdso_time.cc\x00_ZN4vdso13ClockRealtimeEP8timespec\x00_ZN4vdso14ClockMonotonicEP8timespec\x00_DYNAMIC\x00VDSO_PRELINK\x00_params\x00LINUX_2.6\x00getcpu\x00__vdso_getcpu\x00time\x00__vdso_time\x00gettimeofday\x00__vdso_gettimeofday\x00clock_gettime\x00_GLOBAL_OFFSET_TABLE_\x00__vdso_clock_gettime\x00__kernel_rt_sigreturn\x00\x00.text\x00.comment\x00.bss\x00.dynstr\x00.eh_frame_hdr\x00.gnu.version\x00.dynsym\x00.hash\x00.note\x00.eh_frame\x00.gnu.version_d\x00.dynamic\x00.shstrtab\x00.strtab\x00.symtab\x00.data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff \x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`p\xff\xff\xff\xff\xff`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00hp\xff\xff\xff\xff\xffh\x00\x00\x00\x00\x00\x00\x9b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00+\x00\x00\x00\xff\xff\xffo\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00V\x00\x00\x00\xfd\xff\xffo\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00F\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Tp\xff\xff\xff\xff\xffT\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00\x00\x00\xb4p\xff\xff\xff\xff\xff\xb4\x00\x00\x00\x00\x00\x00D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00L\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00\x00\x00\xf8p\xff\xff\xff\xff\xff\xf8\x00\x00\x00\x00\x00\x00\xe8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0p\xff\xff\xff\xff\xff\xe0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff0\x00\x00\x00\x00\x00\x00~\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaep\xff\xff\xff\xff\xff\xae\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\x00\x00\x00\x00\x00\x00+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0\x00\x00\x00\x00\x00\x00\xc8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x8e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") diff --git a/pkg/sentry/loader/vdsodata/vdso_arm64.go b/pkg/sentry/loader/vdsodata/vdso_arm64.go new file mode 100644 index 000000000..67de78969 --- /dev/null +++ b/pkg/sentry/loader/vdsodata/vdso_arm64.go @@ -0,0 +1,7 @@ +// +build arm64 + +// Generated by go_embed_data for //pkg/sentry/loader/vdsodata:vdso_bin. DO NOT EDIT. + +package vdsodata + +var Binary = []byte("ELF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x008\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00p\xff\xff\xff\xff\xff\x84\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\xd0p\xff\xff\xff\xff\xff\xd0p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9c\x00\x00\x00\x00\x00\x00\x9cp\xff\xff\xff\xff\xff\x9cp\xff\xff\xff\xff\xff@\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\xe5td\x00\x00\x00\xdc\x00\x00\x00\x00\x00\x00\xdcp\xff\xff\xff\xff\xff\xdcp\xff\xff\xff\xff\xff<\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8p\xff\xff\xff\xff\xff<\x00\x00\x00\x00\x00\x00\x004\x00\x00\x00\x00\x00xp\xff\xff\xff\xff\xff`\x00\x00\x00\x00\x00\x00\x00J\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff4\x00\x00\x00\x00\x00\x00\x00a\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00linux-vdso.so.1\x00LINUX_2.6.39\x00__kernel_clock_getres\x00__kernel_gettimeofday\x00__kernel_clock_gettime\x00__kernel_rt_sigreturn\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa1\xbf\xee
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x89\xcb_\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00GNU\x00gold 1.16\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00GNU\x004\x93\xb8\xcc$\xaaU\x92\xcd\\\xc0o\xbeb)*\xe1T\x90\x9f;8\x00\x00\x00\x00\x00\x00T
\x00\x00T\x00\x00\x00d
\x00\x00l\x00\x00\x00\x9c
\x00\x00\x84\x00\x00\x00\xfc
\x00\x00\xac\x00\x00\x00<\x00\x00\xc4\x00\x00\x00\xf4\x00\x00\xdc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00zR\x00x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004\x00\x00\x00\xf0\x00\x004\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00L\x00\x00\x00
\x00\x00`\x00\x00\x00\x00A0\x9d\x9eB\x93\x94T\xde\xdd\xd3\xd4\x00\x00\x00\x00\x00\x00\x00\x00t\x00\x00\x00H
\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x8c\x00\x00\x00p
\x00\x00\xb4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Pp\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0p\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00w\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xff\xffo\x00\x00\x00\x00Xp\xff\xff\xff\xff\xff\xfc\xff\xffo\x00\x00\x00\x00dp\xff\xff\xff\xff\xff\xfd\xff\xffo\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00h\x80\xd2\x00\x00\xd4\xc0_\xd6 \xd5\xe3\xaa\x00q\xe0\x00\x00T\x00q\xa0\x00\x00T\xc0\x00\x004(\x80\xd2\x00\x00\xd4\xc0_\xd6\xe0\xaaZ\x00\x00\xe0\xaa*\x00\x00 \xd5\xfd{\xbd\xa9\xfd\x00\x91\xf3S\xa9\xf4\xaa\xc0\x00\xb4\xf3\x00\xaa\xe0\x83\x00\x91!\x00\x00\x94\xa0\x005\xe2B\xa9\xe0\xf9\x9e\xd2`j\xbc\xf2\xa0t\xd3\xf2\x80\xe4\xf2 |@\x9b\x00\xfcG\x93\x00\xfc\x81\xcbb\x00\xa9\x00\x00\x80RT\x00\x00\xb4\x9f\x00\xf9\xf3SA\xa9\xfd{è\xc0_\xd6\x00q\xac\x00\x00T\xc0\x00\xf86H\x80\xd2\x00\x00\xd4\xc0_\xd6\x00q\x81\xff\xffT\xa1\x00\x00\xb4\"\x00\x80\xd2\x00\x00\x80R?\x00\xa9\xc0_\xd6\x00\x00\x80R\xc0_\xd6\xd5 B\xf7\xfeC\x00@\xf9\xe1\x00\xaa\xbf9\xd5cxF\x9cB\xa9e|@\x93D\xa0C\xa9@\xe0;տ9\xd5C\x00@\xf9\x00\xeb\xe1\xfe\xffT\x86\x00\xb4\xff\x00\x00\xeb\x00\x80\xd2L\x00T@\xd9\xd2\x00\x00\xcbCs\xe7\xf2\xfc\x93cȚ|Û\xa2\x9b\x00|\x9bB\x80\xc0\x93\x84\x00\x8bbJ\x8b\xd2b\xb4\xf2@\x99҃\xfcI\xd3\xe2\xd7\xf2\x82\xe0\xf2Es\xa7\xf2\x00\x00\x80Rc|b\xfcK\xd3\"\x00\x00\xf9B\x90\x9b\"\x00\xf9\xc0_\xd6\x00\x00\x80R(\x80\xd2\x00\x00\xd4\xc0_\xd6 Ղ\xf1\xfeC\x00@\xf9\xe1\x00\xaa\xbf9\xd5cxF\x9c@\xa9e|@\x93D\xa0A\xa9@\xe0;տ9\xd5C\x00@\xf9\x00\xeb\xe1\xfe\xffT\x86\x00\xb4\xff\x00\x00\xeb\x00\x80\xd2L\x00T@\xd9\xd2\x00\x00\xcbCs\xe7\xf2\xfc\x93cȚ|Û\xa2\x9b\x00|\x9bB\x80\xc0\x93\x84\x00\x8bbJ\x8b\xd2b\xb4\xf2@\x99҃\xfcI\xd3\xe2\xd7\xf2\x82\xe0\xf2Es\xa7\xf2\x00\x00\x80Rc|b\xfcK\xd3\"\x00\x00\xf9B\x90\x9b\"\x00\xf9\xc0_\xd6 \x00\x80R(\x80\xd2\x00\x00\xd4\xc0_\xd6\x00GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\xb4\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\xd0p\xff\xff\xff\xff\xff\xb4\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00 \x00\xd0p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00i\x00\x00\x00\x00\x00\xf1\xff\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00v\x00\x00\x00\x00\x00\xf1\xff\x00\xf0o\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00~\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x8b\x00\x00\x00\x00\x00\xd8p\xff\xff\xff\xff\xff<\x00\x00\x00\x00\x00\x00\x00\xa1\x00\x00\x00\x00\x00xp\xff\xff\xff\xff\xff`\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff4\x00\x00\x00\x00\x00\x00\x00\xce\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00vdso.cc\x00$x\x00vdso_time.cc\x00_ZN4vdso13ClockRealtimeEP8timespec\x00_ZN4vdso14ClockMonotonicEP8timespec\x00_DYNAMIC\x00VDSO_PRELINK\x00_params\x00LINUX_2.6.39\x00__kernel_clock_getres\x00__kernel_gettimeofday\x00__kernel_clock_gettime\x00__kernel_rt_sigreturn\x00\x00.text\x00.comment\x00.bss\x00.dynstr\x00.eh_frame_hdr\x00.gnu.version\x00.dynsym\x00.hash\x00.note\x00.eh_frame\x00.gnu.version_d\x00.dynamic\x00.shstrtab\x00.strtab\x00.symtab\x00.data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff \x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Pp\xff\xff\xff\xff\xffP\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0p\xff\xff\xff\xff\xff\xe0\x00\x00\x00\x00\x00\x00w\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00+\x00\x00\x00\xff\xff\xffo\x00\x00\x00\x00\x00\x00\x00Xp\xff\xff\xff\xff\xffX\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00V\x00\x00\x00\xfd\xff\xffo\x00\x00\x00\x00\x00\x00\x00dp\xff\xff\xff\xff\xffd\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00F\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9cp\xff\xff\xff\xff\xff\x9c\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdcp\xff\xff\xff\xff\xff\xdc\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00L\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0p\xff\xff\xff\xff\xff\xd0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0p\xff\xff\xff\xff\xff\xe0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff0\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x84p\xff\xff\xff\xff\xff\x84\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x00h\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfc\x00\x00\x00\x00\x00\x00\x8e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD deleted file mode 100644 index c30e88725..000000000 --- a/pkg/sentry/memmap/BUILD +++ /dev/null @@ -1,68 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "mappable_range", - out = "mappable_range.go", - package = "memmap", - prefix = "Mappable", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "mapping_set_impl", - out = "mapping_set_impl.go", - package = "memmap", - prefix = "Mapping", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "MappableRange", - "Value": "MappingsOfRange", - "Functions": "mappingSetFunctions", - }, -) - -go_template_instance( - name = "file_range", - out = "file_range.go", - package = "memmap", - prefix = "File", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_library( - name = "memmap", - srcs = [ - "file_range.go", - "mappable_range.go", - "mapping_set.go", - "mapping_set_impl.go", - "memmap.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/hostarch", - "//pkg/log", - "//pkg/safemem", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "memmap_test", - size = "small", - srcs = ["mapping_set_test.go"], - library = ":memmap", - deps = ["//pkg/hostarch"], -) diff --git a/pkg/sentry/memmap/file_range.go b/pkg/sentry/memmap/file_range.go new file mode 100644 index 000000000..6f0b4bde4 --- /dev/null +++ b/pkg/sentry/memmap/file_range.go @@ -0,0 +1,76 @@ +package memmap + +// A Range represents a contiguous range of T. +// +// +stateify savable +type FileRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +// +//go:nosplit +func (r FileRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +// +//go:nosplit +func (r FileRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +// +//go:nosplit +func (r FileRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +// +//go:nosplit +func (r FileRange) Overlaps(r2 FileRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +// +//go:nosplit +func (r FileRange) IsSupersetOf(r2 FileRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +// +//go:nosplit +func (r FileRange) Intersect(r2 FileRange) FileRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +// +//go:nosplit +func (r FileRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/memmap/mappable_range.go b/pkg/sentry/memmap/mappable_range.go new file mode 100644 index 000000000..7b7312cb6 --- /dev/null +++ b/pkg/sentry/memmap/mappable_range.go @@ -0,0 +1,76 @@ +package memmap + +// A Range represents a contiguous range of T. +// +// +stateify savable +type MappableRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +// +//go:nosplit +func (r MappableRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +// +//go:nosplit +func (r MappableRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +// +//go:nosplit +func (r MappableRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +// +//go:nosplit +func (r MappableRange) Overlaps(r2 MappableRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +// +//go:nosplit +func (r MappableRange) IsSupersetOf(r2 MappableRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +// +//go:nosplit +func (r MappableRange) Intersect(r2 MappableRange) MappableRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +// +//go:nosplit +func (r MappableRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/memmap/mapping_set_impl.go b/pkg/sentry/memmap/mapping_set_impl.go new file mode 100644 index 000000000..c32df9259 --- /dev/null +++ b/pkg/sentry/memmap/mapping_set_impl.go @@ -0,0 +1,1643 @@ +package memmap + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const MappingtrackGaps = 0 + +var _ = uint8(MappingtrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type MappingdynamicGap [MappingtrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *MappingdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *MappingdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + MappingminDegree = 3 + + MappingmaxDegree = 2 * MappingminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type MappingSet struct { + root Mappingnode `state:".(*MappingSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *MappingSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *MappingSet) IsEmptyRange(r MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *MappingSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *MappingSet) SpanRange(r MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *MappingSet) FirstSegment() MappingIterator { + if s.root.nrSegments == 0 { + return MappingIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *MappingSet) LastSegment() MappingIterator { + if s.root.nrSegments == 0 { + return MappingIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *MappingSet) FirstGap() MappingGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return MappingGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *MappingSet) LastGap() MappingGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return MappingGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *MappingSet) Find(key uint64) (MappingIterator, MappingGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return MappingIterator{n, i}, MappingGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return MappingIterator{}, MappingGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *MappingSet) FindSegment(key uint64) MappingIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *MappingSet) LowerBoundSegment(min uint64) MappingIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *MappingSet) UpperBoundSegment(max uint64) MappingIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *MappingSet) FindGap(key uint64) MappingGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *MappingSet) LowerBoundGap(min uint64) MappingGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *MappingSet) UpperBoundGap(max uint64) MappingGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *MappingSet) Add(r MappableRange, val MappingsOfRange) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *MappingSet) AddWithoutMerging(r MappableRange, val MappingsOfRange) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *MappingSet) Insert(gap MappingGapIterator, r MappableRange, val MappingsOfRange) MappingIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (mappingSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := MappingtrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (mappingSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (mappingSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := MappingtrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *MappingSet) InsertWithoutMerging(gap MappingGapIterator, r MappableRange, val MappingsOfRange) MappingIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *MappingSet) InsertWithoutMergingUnchecked(gap MappingGapIterator, r MappableRange, val MappingsOfRange) MappingIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := MappingtrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return MappingIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *MappingSet) Remove(seg MappingIterator) MappingGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if MappingtrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + mappingSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if MappingtrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(MappingGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *MappingSet) RemoveAll() { + s.root = Mappingnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *MappingSet) RemoveRange(r MappableRange) MappingGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *MappingSet) Merge(first, second MappingIterator) MappingIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *MappingSet) MergeUnchecked(first, second MappingIterator) MappingIterator { + if first.End() == second.Start() { + if mval, ok := (mappingSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return MappingIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *MappingSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *MappingSet) MergeRange(r MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *MappingSet) MergeAdjacent(r MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *MappingSet) Split(seg MappingIterator, split uint64) (MappingIterator, MappingIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *MappingSet) SplitUnchecked(seg MappingIterator, split uint64) (MappingIterator, MappingIterator) { + val1, val2 := (mappingSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *MappingSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *MappingSet) Isolate(seg MappingIterator, r MappableRange) MappingIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *MappingSet) ApplyContiguous(r MappableRange, fn func(seg MappingIterator)) MappingGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return MappingGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return MappingGapIterator{} + } + } +} + +// +stateify savable +type Mappingnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Mappingnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap MappingdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [MappingmaxDegree - 1]MappableRange + values [MappingmaxDegree - 1]MappingsOfRange + children [MappingmaxDegree]*Mappingnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Mappingnode) firstSegment() MappingIterator { + for n.hasChildren { + n = n.children[0] + } + return MappingIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Mappingnode) lastSegment() MappingIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return MappingIterator{n, n.nrSegments - 1} +} + +func (n *Mappingnode) prevSibling() *Mappingnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Mappingnode) nextSibling() *Mappingnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Mappingnode) rebalanceBeforeInsert(gap MappingGapIterator) MappingGapIterator { + if n.nrSegments < MappingmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &Mappingnode{ + nrSegments: MappingminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Mappingnode{ + nrSegments: MappingminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:MappingminDegree-1], n.keys[:MappingminDegree-1]) + copy(left.values[:MappingminDegree-1], n.values[:MappingminDegree-1]) + copy(right.keys[:MappingminDegree-1], n.keys[MappingminDegree:]) + copy(right.values[:MappingminDegree-1], n.values[MappingminDegree:]) + n.keys[0], n.values[0] = n.keys[MappingminDegree-1], n.values[MappingminDegree-1] + MappingzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:MappingminDegree], n.children[:MappingminDegree]) + copy(right.children[:MappingminDegree], n.children[MappingminDegree:]) + MappingzeroNodeSlice(n.children[2:]) + for i := 0; i < MappingminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if MappingtrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < MappingminDegree { + return MappingGapIterator{left, gap.index} + } + return MappingGapIterator{right, gap.index - MappingminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[MappingminDegree-1], n.values[MappingminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Mappingnode{ + nrSegments: MappingminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:MappingminDegree-1], n.keys[MappingminDegree:]) + copy(sibling.values[:MappingminDegree-1], n.values[MappingminDegree:]) + MappingzeroValueSlice(n.values[MappingminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:MappingminDegree], n.children[MappingminDegree:]) + MappingzeroNodeSlice(n.children[MappingminDegree:]) + for i := 0; i < MappingminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = MappingminDegree - 1 + + if MappingtrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < MappingminDegree { + return gap + } + return MappingGapIterator{sibling, gap.index - MappingminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Mappingnode) rebalanceAfterRemove(gap MappingGapIterator) MappingGapIterator { + for { + if n.nrSegments >= MappingminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= MappingminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + mappingSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if MappingtrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return MappingGapIterator{n, 0} + } + if gap.node == n { + return MappingGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= MappingminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + mappingSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if MappingtrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return MappingGapIterator{n, n.nrSegments} + } + return MappingGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return MappingGapIterator{p, gap.index} + } + if gap.node == right { + return MappingGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Mappingnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = MappingGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + mappingSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if MappingtrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *Mappingnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *Mappingnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *Mappingnode) calculateMaxGapLeaf() uint64 { + max := MappingGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (MappingGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *Mappingnode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Mappingnode) searchFirstLargeEnoughGap(minSize uint64) MappingGapIterator { + if n.maxGap.Get() < minSize { + return MappingGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := MappingGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Mappingnode) searchLastLargeEnoughGap(minSize uint64) MappingGapIterator { + if n.maxGap.Get() < minSize { + return MappingGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := MappingGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type MappingIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Mappingnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg MappingIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg MappingIterator) Range() MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg MappingIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg MappingIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg MappingIterator) SetRangeUnchecked(r MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg MappingIterator) SetRange(r MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg MappingIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg MappingIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg MappingIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg MappingIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg MappingIterator) Value() MappingsOfRange { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg MappingIterator) ValuePtr() *MappingsOfRange { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg MappingIterator) SetValue(val MappingsOfRange) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg MappingIterator) PrevSegment() MappingIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return MappingIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return MappingIterator{} + } + return MappingsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg MappingIterator) NextSegment() MappingIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return MappingIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return MappingIterator{} + } + return MappingsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg MappingIterator) PrevGap() MappingGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return MappingGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg MappingIterator) NextGap() MappingGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return MappingGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg MappingIterator) PrevNonEmpty() (MappingIterator, MappingGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return MappingIterator{}, gap + } + return gap.PrevSegment(), MappingGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg MappingIterator) NextNonEmpty() (MappingIterator, MappingGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return MappingIterator{}, gap + } + return gap.NextSegment(), MappingGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type MappingGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Mappingnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap MappingGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap MappingGapIterator) Range() MappableRange { + return MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap MappingGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return mappingSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap MappingGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return mappingSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap MappingGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap MappingGapIterator) PrevSegment() MappingIterator { + return MappingsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap MappingGapIterator) NextSegment() MappingIterator { + return MappingsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap MappingGapIterator) PrevGap() MappingGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return MappingGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap MappingGapIterator) NextGap() MappingGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return MappingGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap MappingGapIterator) NextLargeEnoughGap(minSize uint64) MappingGapIterator { + if MappingtrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap MappingGapIterator) nextLargeEnoughGapHelper(minSize uint64) MappingGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return MappingGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap MappingGapIterator) PrevLargeEnoughGap(minSize uint64) MappingGapIterator { + if MappingtrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap MappingGapIterator) prevLargeEnoughGapHelper(minSize uint64) MappingGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return MappingGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func MappingsegmentBeforePosition(n *Mappingnode, i int) MappingIterator { + for i == 0 { + if n.parent == nil { + return MappingIterator{} + } + n, i = n.parent, n.parentIndex + } + return MappingIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func MappingsegmentAfterPosition(n *Mappingnode, i int) MappingIterator { + for i == n.nrSegments { + if n.parent == nil { + return MappingIterator{} + } + n, i = n.parent, n.parentIndex + } + return MappingIterator{n, i} +} + +func MappingzeroValueSlice(slice []MappingsOfRange) { + + for i := range slice { + mappingSetFunctions{}.ClearValue(&slice[i]) + } +} + +func MappingzeroNodeSlice(slice []*Mappingnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *MappingSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Mappingnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Mappingnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if MappingtrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type MappingSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []MappingsOfRange +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *MappingSet) ExportSortedSlices() *MappingSegmentDataSlices { + var sds MappingSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *MappingSet) ImportSortedSlices(sds *MappingSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *MappingSet) segmentTestCheck(expectedSegments int, segFunc func(int, MappableRange, MappingsOfRange) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *MappingSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *MappingSet) saveRoot() *MappingSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *MappingSet) loadRoot(sds *MappingSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go deleted file mode 100644 index 5cb81fde7..000000000 --- a/pkg/sentry/memmap/mapping_set_test.go +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memmap - -import ( - "gvisor.dev/gvisor/pkg/hostarch" - "reflect" - "testing" -) - -type testMappingSpace struct { - // Ideally we'd store the full ranges that were invalidated, rather - // than individual calls to Invalidate, as they are an implementation - // detail, but this is the simplest way for now. - inv []hostarch.AddrRange -} - -func (n *testMappingSpace) reset() { - n.inv = []hostarch.AddrRange{} -} - -func (n *testMappingSpace) Invalidate(ar hostarch.AddrRange, opts InvalidateOpts) { - n.inv = append(n.inv, ar) -} - -func TestAddRemoveMapping(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - mapped := set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true) - if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings (hostarch.AddrRanges => memmap.MappableRange): - // [0x10000, 0x12000) => [0x1000, 0x3000) - t.Log(&set) - - mapped = set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) - if len(mapped) != 0 { - t.Errorf("AddMapping: got %+v, wanted []", mapped) - } - - // Mappings: - // [0x10000, 0x11000) => [0x1000, 0x2000) - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - t.Log(&set) - - mapped = set.AddMapping(ms, hostarch.AddrRange{0x30000, 0x31000}, 0x4000, true) - if got, want := mapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x11000) => [0x1000, 0x2000) - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - // [0x30000, 0x31000) => [0x4000, 0x5000) - t.Log(&set) - - mapped = set.AddMapping(ms, hostarch.AddrRange{0x12000, 0x15000}, 0x3000, true) - if got, want := mapped, []MappableRange{{0x3000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x11000) => [0x1000, 0x2000) - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - // [0x12000, 0x13000) => [0x3000, 0x4000) - // [0x13000, 0x14000) and [0x30000, 0x31000) => [0x4000, 0x5000) - // [0x14000, 0x15000) => [0x5000, 0x6000) - t.Log(&set) - - unmapped := set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0x1000, true) - if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - // [0x12000, 0x13000) => [0x3000, 0x4000) - // [0x13000, 0x14000) and [0x30000, 0x31000) => [0x4000, 0x5000) - // [0x14000, 0x15000) => [0x5000, 0x6000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - // Mappings: - // [0x11000, 0x13000) => [0x2000, 0x4000) - // [0x13000, 0x14000) and [0x30000, 0x31000) => [0x4000, 0x5000) - // [0x14000, 0x15000) => [0x5000, 0x6000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x11000, 0x15000}, 0x2000, true) - if got, want := unmapped, []MappableRange{{0x2000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x30000, 0x31000) => [0x4000, 0x5000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x30000, 0x31000}, 0x4000, true) - if got, want := unmapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidateWholeMapping(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0, true) - // Mappings: - // [0x10000, 0x11000) => [0, 0x1000) - t.Log(&set) - set.Invalidate(MappableRange{0, 0x1000}, InvalidateOpts{}) - if got, want := ms.inv, []hostarch.AddrRange{{0x10000, 0x11000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidatePartialMapping(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x13000}, 0, true) - // Mappings: - // [0x10000, 0x13000) => [0, 0x3000) - t.Log(&set) - set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{}) - if got, want := ms.inv, []hostarch.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidateMultipleMappings(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0, true) - set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) - // Mappings: - // [0x10000, 0x11000) => [0, 0x1000) - // [0x12000, 0x13000) => [0x2000, 0x3000) - t.Log(&set) - set.Invalidate(MappableRange{0, 0x3000}, InvalidateOpts{}) - if got, want := ms.inv, []hostarch.AddrRange{{0x10000, 0x11000}, {0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidateOverlappingMappings(t *testing.T) { - set := MappingSet{} - ms1 := &testMappingSpace{} - ms2 := &testMappingSpace{} - - set.AddMapping(ms1, hostarch.AddrRange{0x10000, 0x12000}, 0, true) - set.AddMapping(ms2, hostarch.AddrRange{0x20000, 0x22000}, 0x1000, true) - // Mappings: - // ms1:[0x10000, 0x12000) => [0, 0x2000) - // ms2:[0x11000, 0x13000) => [0x1000, 0x3000) - t.Log(&set) - set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{}) - if got, want := ms1.inv, []hostarch.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want) - } - if got, want := ms2.inv, []hostarch.AddrRange{{0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want) - } -} - -func TestMixedWritableMappings(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - mapped := set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true) - if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x12000) writable => [0x1000, 0x3000) - t.Log(&set) - - mapped = set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x22000}, 0x2000, false) - if got, want := mapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x11000) writable => [0x1000, 0x2000) - // [0x11000, 0x12000) writable and [0x20000, 0x21000) readonly => [0x2000, 0x3000) - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - // Unmap should fail because we specified the readonly map address range, but - // asked to unmap a writable segment. - unmapped := set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - // Readonly mapping removed, but writable mapping still exists in the range, - // so no mappable range fully unmapped. - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, false) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - // Mappings: - // [0x10000, 0x12000) writable => [0x1000, 0x3000) - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x11000, 0x12000}, 0x2000, true) - if got, want := unmapped, []MappableRange{{0x2000, 0x3000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x12000) writable => [0x1000, 0x3000) - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - // Unmap should fail since writable bit doesn't match. - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, false) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true) - if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x21000, 0x22000}, 0x3000, false) - if got, want := unmapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } -} diff --git a/pkg/sentry/memmap/memmap_impl_state_autogen.go b/pkg/sentry/memmap/memmap_impl_state_autogen.go new file mode 100644 index 000000000..d65558bbe --- /dev/null +++ b/pkg/sentry/memmap/memmap_impl_state_autogen.go @@ -0,0 +1,116 @@ +// automatically generated by stateify. + +package memmap + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *MappingSet) StateTypeName() string { + return "pkg/sentry/memmap.MappingSet" +} + +func (s *MappingSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *MappingSet) beforeSave() {} + +// +checklocksignore +func (s *MappingSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *MappingSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *MappingSet) afterLoad() {} + +// +checklocksignore +func (s *MappingSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*MappingSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*MappingSegmentDataSlices)) }) +} + +func (n *Mappingnode) StateTypeName() string { + return "pkg/sentry/memmap.Mappingnode" +} + +func (n *Mappingnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *Mappingnode) beforeSave() {} + +// +checklocksignore +func (n *Mappingnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *Mappingnode) afterLoad() {} + +// +checklocksignore +func (n *Mappingnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (m *MappingSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/memmap.MappingSegmentDataSlices" +} + +func (m *MappingSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (m *MappingSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (m *MappingSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.Start) + stateSinkObject.Save(1, &m.End) + stateSinkObject.Save(2, &m.Values) +} + +func (m *MappingSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (m *MappingSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.Start) + stateSourceObject.Load(1, &m.End) + stateSourceObject.Load(2, &m.Values) +} + +func init() { + state.Register((*MappingSet)(nil)) + state.Register((*Mappingnode)(nil)) + state.Register((*MappingSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/memmap/memmap_state_autogen.go b/pkg/sentry/memmap/memmap_state_autogen.go new file mode 100644 index 000000000..8624ecb60 --- /dev/null +++ b/pkg/sentry/memmap/memmap_state_autogen.go @@ -0,0 +1,100 @@ +// automatically generated by stateify. + +package memmap + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fr *FileRange) StateTypeName() string { + return "pkg/sentry/memmap.FileRange" +} + +func (fr *FileRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (fr *FileRange) beforeSave() {} + +// +checklocksignore +func (fr *FileRange) StateSave(stateSinkObject state.Sink) { + fr.beforeSave() + stateSinkObject.Save(0, &fr.Start) + stateSinkObject.Save(1, &fr.End) +} + +func (fr *FileRange) afterLoad() {} + +// +checklocksignore +func (fr *FileRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fr.Start) + stateSourceObject.Load(1, &fr.End) +} + +func (mr *MappableRange) StateTypeName() string { + return "pkg/sentry/memmap.MappableRange" +} + +func (mr *MappableRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (mr *MappableRange) beforeSave() {} + +// +checklocksignore +func (mr *MappableRange) StateSave(stateSinkObject state.Sink) { + mr.beforeSave() + stateSinkObject.Save(0, &mr.Start) + stateSinkObject.Save(1, &mr.End) +} + +func (mr *MappableRange) afterLoad() {} + +// +checklocksignore +func (mr *MappableRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mr.Start) + stateSourceObject.Load(1, &mr.End) +} + +func (r *MappingOfRange) StateTypeName() string { + return "pkg/sentry/memmap.MappingOfRange" +} + +func (r *MappingOfRange) StateFields() []string { + return []string{ + "MappingSpace", + "AddrRange", + "Writable", + } +} + +func (r *MappingOfRange) beforeSave() {} + +// +checklocksignore +func (r *MappingOfRange) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.MappingSpace) + stateSinkObject.Save(1, &r.AddrRange) + stateSinkObject.Save(2, &r.Writable) +} + +func (r *MappingOfRange) afterLoad() {} + +// +checklocksignore +func (r *MappingOfRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.MappingSpace) + stateSourceObject.Load(1, &r.AddrRange) + stateSourceObject.Load(2, &r.Writable) +} + +func init() { + state.Register((*FileRange)(nil)) + state.Register((*MappableRange)(nil)) + state.Register((*MappingOfRange)(nil)) +} diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD deleted file mode 100644 index 69aff21b6..000000000 --- a/pkg/sentry/mm/BUILD +++ /dev/null @@ -1,170 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "file_refcount_set", - out = "file_refcount_set.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "mm", - prefix = "fileRefcount", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.FileRange", - "Value": "int32", - "Functions": "fileRefcountSetFunctions", - }, -) - -go_template_instance( - name = "vma_set", - out = "vma_set.go", - consts = { - "minDegree": "8", - "trackGaps": "1", - }, - imports = { - "hostarch": "gvisor.dev/gvisor/pkg/hostarch", - }, - package = "mm", - prefix = "vma", - template = "//pkg/segment:generic_set", - types = { - "Key": "hostarch.Addr", - "Range": "hostarch.AddrRange", - "Value": "vma", - "Functions": "vmaSetFunctions", - }, -) - -go_template_instance( - name = "pma_set", - out = "pma_set.go", - consts = { - "minDegree": "8", - }, - imports = { - "hostarch": "gvisor.dev/gvisor/pkg/hostarch", - }, - package = "mm", - prefix = "pma", - template = "//pkg/segment:generic_set", - types = { - "Key": "hostarch.Addr", - "Range": "hostarch.AddrRange", - "Value": "pma", - "Functions": "pmaSetFunctions", - }, -) - -go_template_instance( - name = "io_list", - out = "io_list.go", - package = "mm", - prefix = "io", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*ioResult", - "Linker": "*ioResult", - }, -) - -go_template_instance( - name = "aio_mappable_refs", - out = "aio_mappable_refs.go", - package = "mm", - prefix = "aioMappable", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "aioMappable", - }, -) - -go_template_instance( - name = "special_mappable_refs", - out = "special_mappable_refs.go", - package = "mm", - prefix = "SpecialMappable", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "SpecialMappable", - }, -) - -go_library( - name = "mm", - srcs = [ - "address_space.go", - "aio_context.go", - "aio_context_state.go", - "aio_mappable_refs.go", - "debug.go", - "file_refcount_set.go", - "io.go", - "io_list.go", - "lifecycle.go", - "metadata.go", - "mm.go", - "pma.go", - "pma_set.go", - "procfs.go", - "save_restore.go", - "shm.go", - "special_mappable.go", - "special_mappable_refs.go", - "syscalls.go", - "vma.go", - "vma_set.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/atomicbitops", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safecopy", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs/proc/seqfile", - "//pkg/sentry/fsbridge", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/futex", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/syserror", - "//pkg/tcpip/buffer", - "//pkg/usermem", - ], -) - -go_test( - name = "mm_test", - size = "small", - srcs = ["mm_test.go"], - library = ":mm", - deps = [ - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/sentry/arch", - "//pkg/sentry/contexttest", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md deleted file mode 100644 index f4d43d927..000000000 --- a/pkg/sentry/mm/README.md +++ /dev/null @@ -1,280 +0,0 @@ -This package provides an emulation of Linux semantics for application virtual -memory mappings. - -For completeness, this document also describes aspects of the memory management -subsystem defined outside this package. - -# Background - -We begin by describing semantics for virtual memory in Linux. - -A virtual address space is defined as a collection of mappings from virtual -addresses to physical memory. However, userspace applications do not configure -mappings to physical memory directly. Instead, applications configure memory -mappings from virtual addresses to offsets into a file using the `mmap` system -call.[^mmap-anon] For example, a call to: - - mmap( - /* addr = */ 0x400000, - /* length = */ 0x1000, - PROT_READ | PROT_WRITE, - MAP_SHARED, - /* fd = */ 3, - /* offset = */ 0); - -creates a mapping of length 0x1000 bytes, starting at virtual address (VA) -0x400000, to offset 0 in the file represented by file descriptor (FD) 3. Within -the Linux kernel, virtual memory mappings are represented by *virtual memory -areas* (VMAs). Supposing that FD 3 represents file /tmp/foo, the state of the -virtual memory subsystem after the `mmap` call may be depicted as: - - VMA: VA:0x400000 -> /tmp/foo:0x0 - -Establishing a virtual memory area does not necessarily establish a mapping to a -physical address, because Linux has not necessarily provisioned physical memory -to store the file's contents. Thus, if the application attempts to read the -contents of VA 0x400000, it may incur a *page fault*, a CPU exception that -forces the kernel to create such a mapping to service the read. - -For a file, doing so consists of several logical phases: - -1. The kernel allocates physical memory to store the contents of the required - part of the file, and copies file contents to the allocated memory. - Supposing that the kernel chooses the physical memory at physical address - (PA) 0x2fb000, the resulting state of the system is: - - VMA: VA:0x400000 -> /tmp/foo:0x0 - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - - (In Linux the state of the mapping from file offset to physical memory is - stored in `struct address_space`, but to avoid confusion with other notions - of address space we will refer to this system as filemap, named after Linux - kernel source file `mm/filemap.c`.) - -2. The kernel stores the effective mapping from virtual to physical address in - a *page table entry* (PTE) in the application's *page tables*, which are - used by the CPU's virtual memory hardware to perform address translation. - The resulting state of the system is: - - VMA: VA:0x400000 -> /tmp/foo:0x0 - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - PTE: VA:0x400000 -----------------> PA:0x2fb000 - - The PTE is required for the application to actually use the contents of the - mapped file as virtual memory. However, the PTE is derived from the VMA and - filemap state, both of which are independently mutable, such that mutations - to either will affect the PTE. For example: - - - The application may remove the VMA using the `munmap` system call. This - breaks the mapping from VA:0x400000 to /tmp/foo:0x0, and consequently - the mapping from VA:0x400000 to PA:0x2fb000. However, it does not - necessarily break the mapping from /tmp/foo:0x0 to PA:0x2fb000, so a - future mapping of the same file offset may reuse this physical memory. - - - The application may invalidate the file's contents by passing a length - of 0 to the `ftruncate` system call. This breaks the mapping from - /tmp/foo:0x0 to PA:0x2fb000, and consequently the mapping from - VA:0x400000 to PA:0x2fb000. However, it does not break the mapping from - VA:0x400000 to /tmp/foo:0x0, so future changes to the file's contents - may again be made visible at VA:0x400000 after another page fault - results in the allocation of a new physical address. - - Note that, in order to correctly break the mapping from VA:0x400000 to - PA:0x2fb000 in the latter case, filemap must also store a *reverse mapping* - from /tmp/foo:0x0 to VA:0x400000 so that it can locate and remove the PTE. - -[^mmap-anon]: Memory mappings to non-files are discussed in later sections. - -## Private Mappings - -The preceding example considered VMAs created using the `MAP_SHARED` flag, which -means that PTEs derived from the mapping should always use physical memory that -represents the current state of the mapped file.[^mmap-dev-zero] Applications -can alternatively pass the `MAP_PRIVATE` flag to create a *private mapping*. -Private mappings are *copy-on-write*. - -Suppose that the application instead created a private mapping in the previous -example. In Linux, the state of the system after a read page fault would be: - - VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - PTE: VA:0x400000 -----------------> PA:0x2fb000 (read-only) - -Now suppose the application attempts to write to VA:0x400000. For a shared -mapping, the write would be propagated to PA:0x2fb000, and the kernel would be -responsible for ensuring that the write is later propagated to the mapped file. -For a private mapping, the write incurs another page fault since the PTE is -marked read-only. In response, the kernel allocates physical memory to store the -mapping's *private copy* of the file's contents, copies file contents to the -allocated memory, and changes the PTE to map to the private copy. Supposing that -the kernel chooses the physical memory at physical address (PA) 0x5ea000, the -resulting state of the system is: - - VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - PTE: VA:0x400000 -----------------> PA:0x5ea000 - -Note that the filemap mapping from /tmp/foo:0x0 to PA:0x2fb000 may still exist, -but is now irrelevant to this mapping. - -[^mmap-dev-zero]: Modulo files with special mmap semantics such as `/dev/zero`. - -## Anonymous Mappings - -Instead of passing a file to the `mmap` system call, applications can instead -request an *anonymous* mapping by passing the `MAP_ANONYMOUS` flag. -Semantically, an anonymous mapping is essentially a mapping to an ephemeral file -initially filled with zero bytes. Practically speaking, this is how shared -anonymous mappings are implemented, but private anonymous mappings do not result -in the creation of an ephemeral file; since there would be no way to modify the -contents of the underlying file through a private mapping, all private anonymous -mappings use a single shared page filled with zero bytes until copy-on-write -occurs. - -# Virtual Memory in the Sentry - -The sentry implements application virtual memory atop a host kernel, introducing -an additional level of indirection to the above. - -Consider the same scenario as in the previous section. Since the sentry handles -application system calls, the effect of an application `mmap` system call is to -create a VMA in the sentry (as opposed to the host kernel): - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - -When the application first incurs a page fault on this address, the host kernel -delivers information about the page fault to the sentry in a platform-dependent -manner, and the sentry handles the fault: - -1. The sentry allocates memory to store the contents of the required part of - the file, and copies file contents to the allocated memory. However, since - the sentry is implemented atop a host kernel, it does not configure mappings - to physical memory directly. Instead, mappable "memory" in the sentry is - represented by a host file descriptor and offset, since (as noted in - "Background") this is the memory mapping primitive provided by the host - kernel. In general, memory is allocated from a temporary host file using the - `pgalloc` package. Supposing that the sentry allocates offset 0x3000 from - host file "memory-file", the resulting state is: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - -2. The sentry stores the effective mapping from virtual address to host file in - a host VMA by invoking the `mmap` system call: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x3000 - -3. The sentry returns control to the application, which immediately incurs the - page fault again.[^mmap-populate] However, since a host VMA now exists for - the faulting virtual address, the host kernel now handles the page fault as - described in "Background": - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x3000 - Host filemap: host:memory-file:0x3000 -> PA:0x2fb000 - Host PTE: VA:0x400000 --------------------------------------------> PA:0x2fb000 - -Thus, from an implementation standpoint, host VMAs serve the same purpose in the -sentry that PTEs do in Linux. As in Linux, sentry VMA and filemap state is -independently mutable, and the desired state of host VMAs is derived from that -state. - -[^mmap-populate]: The sentry could force the host kernel to establish PTEs when - it creates the host VMA by passing the `MAP_POPULATE` flag to - the `mmap` system call, but usually does not. This is because, - to reduce the number of page faults that require handling by - the sentry and (correspondingly) the number of host `mmap` - system calls, the sentry usually creates host VMAs that are - much larger than the single faulting page. - -## Private Mappings - -The sentry implements private mappings consistently with Linux. Before -copy-on-write, the private mapping example given in the Background results in: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x3000 (read-only) - Host filemap: host:memory-file:0x3000 -> PA:0x2fb000 - Host PTE: VA:0x400000 --------------------------------------------> PA:0x2fb000 (read-only) - -When the application attempts to write to this address, the host kernel delivers -information about the resulting page fault to the sentry. Analogous to Linux, -the sentry allocates memory to store the mapping's private copy of the file's -contents, copies file contents to the allocated memory, and changes the host VMA -to map to the private copy. Supposing that the sentry chooses the offset 0x4000 -in host file `memory-file` to store the private copy, the state of the system -after copy-on-write is: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x4000 - Host filemap: host:memory-file:0x4000 -> PA:0x5ea000 - Host PTE: VA:0x400000 --------------------------------------------> PA:0x5ea000 - -However, this highlights an important difference between Linux and the sentry. -In Linux, page tables are concrete (architecture-dependent) data structures -owned by the kernel. Conversely, the sentry has the ability to create and -destroy host VMAs using host system calls, but it does not have direct access to -their state. Thus, as written, if the application invokes the `munmap` system -call to remove the sentry VMA, it is non-trivial for the sentry to determine -that it should deallocate `host:memory-file:0x4000`. This implies that the -sentry must retain information about the host VMAs that it has created. - -## Anonymous Mappings - -The sentry implements anonymous mappings consistently with Linux, except that -there is no shared zero page. - -# Implementation Constructs - -In Linux: - -- A virtual address space is represented by `struct mm_struct`. - -- VMAs are represented by `struct vm_area_struct`, stored in `struct - mm_struct::mmap`. - -- Mappings from file offsets to physical memory are stored in `struct - address_space`. - -- Reverse mappings from file offsets to virtual mappings are stored in `struct - address_space::i_mmap`. - -- Physical memory pages are represented by a pointer to `struct page` or an - index called a *page frame number* (PFN), represented by `pfn_t`. - -- PTEs are represented by architecture-dependent type `pte_t`, stored in a - table hierarchy rooted at `struct mm_struct::pgd`. - -In the sentry: - -- A virtual address space is represented by type [`mm.MemoryManager`][mm]. - -- Sentry VMAs are represented by type [`mm.vma`][mm], stored in - `mm.MemoryManager.vmas`. - -- Mappings from sentry file offsets to host file offsets are abstracted - through interface method [`memmap.Mappable.Translate`][memmap]. - -- Reverse mappings from sentry file offsets to virtual mappings are abstracted - through interface methods - [`memmap.Mappable.AddMapping` and `memmap.Mappable.RemoveMapping`][memmap]. - -- Host files that may be mapped into host VMAs are represented by type - [`platform.File`][platform]. - -- Host VMAs are represented in the sentry by type [`mm.pma`][mm] ("platform - mapping area"), stored in `mm.MemoryManager.pmas`. - -- Creation and destruction of host VMAs is abstracted through interface - methods - [`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform]. - -[memmap]: https://github.com/google/gvisor/blob/master/pkg/sentry/memmap/memmap.go -[mm]: https://github.com/google/gvisor/blob/master/pkg/sentry/mm/mm.go -[pgalloc]: https://github.com/google/gvisor/blob/master/pkg/sentry/pgalloc/pgalloc.go -[platform]: https://github.com/google/gvisor/blob/master/pkg/sentry/platform/platform.go diff --git a/pkg/sentry/mm/aio_mappable_refs.go b/pkg/sentry/mm/aio_mappable_refs.go new file mode 100644 index 000000000..6e1bc6739 --- /dev/null +++ b/pkg/sentry/mm/aio_mappable_refs.go @@ -0,0 +1,140 @@ +package mm + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const aioMappableenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var aioMappableobj *aioMappable + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type aioMappableRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *aioMappableRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *aioMappableRefs) RefType() string { + return fmt.Sprintf("%T", aioMappableobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *aioMappableRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *aioMappableRefs) LogRefs() bool { + return aioMappableenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *aioMappableRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *aioMappableRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if aioMappableenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *aioMappableRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if aioMappableenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *aioMappableRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if aioMappableenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *aioMappableRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/mm/file_refcount_set.go b/pkg/sentry/mm/file_refcount_set.go new file mode 100644 index 000000000..602a137d4 --- /dev/null +++ b/pkg/sentry/mm/file_refcount_set.go @@ -0,0 +1,1647 @@ +package mm + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const fileRefcounttrackGaps = 0 + +var _ = uint8(fileRefcounttrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type fileRefcountdynamicGap [fileRefcounttrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *fileRefcountdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *fileRefcountdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + fileRefcountminDegree = 3 + + fileRefcountmaxDegree = 2 * fileRefcountminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type fileRefcountSet struct { + root fileRefcountnode `state:".(*fileRefcountSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *fileRefcountSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *fileRefcountSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *fileRefcountSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *fileRefcountSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *fileRefcountSet) FirstSegment() fileRefcountIterator { + if s.root.nrSegments == 0 { + return fileRefcountIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *fileRefcountSet) LastSegment() fileRefcountIterator { + if s.root.nrSegments == 0 { + return fileRefcountIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *fileRefcountSet) FirstGap() fileRefcountGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return fileRefcountGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *fileRefcountSet) LastGap() fileRefcountGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return fileRefcountGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *fileRefcountSet) Find(key uint64) (fileRefcountIterator, fileRefcountGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return fileRefcountIterator{n, i}, fileRefcountGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return fileRefcountIterator{}, fileRefcountGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *fileRefcountSet) FindSegment(key uint64) fileRefcountIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *fileRefcountSet) LowerBoundSegment(min uint64) fileRefcountIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *fileRefcountSet) UpperBoundSegment(max uint64) fileRefcountIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *fileRefcountSet) FindGap(key uint64) fileRefcountGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *fileRefcountSet) LowerBoundGap(min uint64) fileRefcountGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *fileRefcountSet) UpperBoundGap(max uint64) fileRefcountGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *fileRefcountSet) Add(r __generics_imported0.FileRange, val int32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *fileRefcountSet) AddWithoutMerging(r __generics_imported0.FileRange, val int32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *fileRefcountSet) Insert(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (fileRefcountSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := fileRefcounttrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (fileRefcountSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (fileRefcountSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := fileRefcounttrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *fileRefcountSet) InsertWithoutMerging(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *fileRefcountSet) InsertWithoutMergingUnchecked(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := fileRefcounttrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return fileRefcountIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *fileRefcountSet) Remove(seg fileRefcountIterator) fileRefcountGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if fileRefcounttrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + fileRefcountSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if fileRefcounttrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(fileRefcountGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *fileRefcountSet) RemoveAll() { + s.root = fileRefcountnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *fileRefcountSet) RemoveRange(r __generics_imported0.FileRange) fileRefcountGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *fileRefcountSet) Merge(first, second fileRefcountIterator) fileRefcountIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *fileRefcountSet) MergeUnchecked(first, second fileRefcountIterator) fileRefcountIterator { + if first.End() == second.Start() { + if mval, ok := (fileRefcountSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return fileRefcountIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *fileRefcountSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *fileRefcountSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *fileRefcountSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *fileRefcountSet) Split(seg fileRefcountIterator, split uint64) (fileRefcountIterator, fileRefcountIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *fileRefcountSet) SplitUnchecked(seg fileRefcountIterator, split uint64) (fileRefcountIterator, fileRefcountIterator) { + val1, val2 := (fileRefcountSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *fileRefcountSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *fileRefcountSet) Isolate(seg fileRefcountIterator, r __generics_imported0.FileRange) fileRefcountIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *fileRefcountSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg fileRefcountIterator)) fileRefcountGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return fileRefcountGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return fileRefcountGapIterator{} + } + } +} + +// +stateify savable +type fileRefcountnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *fileRefcountnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap fileRefcountdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [fileRefcountmaxDegree - 1]__generics_imported0.FileRange + values [fileRefcountmaxDegree - 1]int32 + children [fileRefcountmaxDegree]*fileRefcountnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *fileRefcountnode) firstSegment() fileRefcountIterator { + for n.hasChildren { + n = n.children[0] + } + return fileRefcountIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *fileRefcountnode) lastSegment() fileRefcountIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return fileRefcountIterator{n, n.nrSegments - 1} +} + +func (n *fileRefcountnode) prevSibling() *fileRefcountnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *fileRefcountnode) nextSibling() *fileRefcountnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *fileRefcountnode) rebalanceBeforeInsert(gap fileRefcountGapIterator) fileRefcountGapIterator { + if n.nrSegments < fileRefcountmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &fileRefcountnode{ + nrSegments: fileRefcountminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &fileRefcountnode{ + nrSegments: fileRefcountminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:fileRefcountminDegree-1], n.keys[:fileRefcountminDegree-1]) + copy(left.values[:fileRefcountminDegree-1], n.values[:fileRefcountminDegree-1]) + copy(right.keys[:fileRefcountminDegree-1], n.keys[fileRefcountminDegree:]) + copy(right.values[:fileRefcountminDegree-1], n.values[fileRefcountminDegree:]) + n.keys[0], n.values[0] = n.keys[fileRefcountminDegree-1], n.values[fileRefcountminDegree-1] + fileRefcountzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:fileRefcountminDegree], n.children[:fileRefcountminDegree]) + copy(right.children[:fileRefcountminDegree], n.children[fileRefcountminDegree:]) + fileRefcountzeroNodeSlice(n.children[2:]) + for i := 0; i < fileRefcountminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if fileRefcounttrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < fileRefcountminDegree { + return fileRefcountGapIterator{left, gap.index} + } + return fileRefcountGapIterator{right, gap.index - fileRefcountminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[fileRefcountminDegree-1], n.values[fileRefcountminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &fileRefcountnode{ + nrSegments: fileRefcountminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:fileRefcountminDegree-1], n.keys[fileRefcountminDegree:]) + copy(sibling.values[:fileRefcountminDegree-1], n.values[fileRefcountminDegree:]) + fileRefcountzeroValueSlice(n.values[fileRefcountminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:fileRefcountminDegree], n.children[fileRefcountminDegree:]) + fileRefcountzeroNodeSlice(n.children[fileRefcountminDegree:]) + for i := 0; i < fileRefcountminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = fileRefcountminDegree - 1 + + if fileRefcounttrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < fileRefcountminDegree { + return gap + } + return fileRefcountGapIterator{sibling, gap.index - fileRefcountminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *fileRefcountnode) rebalanceAfterRemove(gap fileRefcountGapIterator) fileRefcountGapIterator { + for { + if n.nrSegments >= fileRefcountminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= fileRefcountminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + fileRefcountSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if fileRefcounttrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return fileRefcountGapIterator{n, 0} + } + if gap.node == n { + return fileRefcountGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= fileRefcountminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + fileRefcountSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if fileRefcounttrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return fileRefcountGapIterator{n, n.nrSegments} + } + return fileRefcountGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return fileRefcountGapIterator{p, gap.index} + } + if gap.node == right { + return fileRefcountGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *fileRefcountnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = fileRefcountGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + fileRefcountSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if fileRefcounttrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *fileRefcountnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *fileRefcountnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *fileRefcountnode) calculateMaxGapLeaf() uint64 { + max := fileRefcountGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (fileRefcountGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *fileRefcountnode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *fileRefcountnode) searchFirstLargeEnoughGap(minSize uint64) fileRefcountGapIterator { + if n.maxGap.Get() < minSize { + return fileRefcountGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := fileRefcountGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *fileRefcountnode) searchLastLargeEnoughGap(minSize uint64) fileRefcountGapIterator { + if n.maxGap.Get() < minSize { + return fileRefcountGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := fileRefcountGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type fileRefcountIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *fileRefcountnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg fileRefcountIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg fileRefcountIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg fileRefcountIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg fileRefcountIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg fileRefcountIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg fileRefcountIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg fileRefcountIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg fileRefcountIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg fileRefcountIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg fileRefcountIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg fileRefcountIterator) Value() int32 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg fileRefcountIterator) ValuePtr() *int32 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg fileRefcountIterator) SetValue(val int32) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg fileRefcountIterator) PrevSegment() fileRefcountIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return fileRefcountIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return fileRefcountIterator{} + } + return fileRefcountsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg fileRefcountIterator) NextSegment() fileRefcountIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return fileRefcountIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return fileRefcountIterator{} + } + return fileRefcountsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg fileRefcountIterator) PrevGap() fileRefcountGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return fileRefcountGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg fileRefcountIterator) NextGap() fileRefcountGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return fileRefcountGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg fileRefcountIterator) PrevNonEmpty() (fileRefcountIterator, fileRefcountGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return fileRefcountIterator{}, gap + } + return gap.PrevSegment(), fileRefcountGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg fileRefcountIterator) NextNonEmpty() (fileRefcountIterator, fileRefcountGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return fileRefcountIterator{}, gap + } + return gap.NextSegment(), fileRefcountGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type fileRefcountGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *fileRefcountnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap fileRefcountGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap fileRefcountGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap fileRefcountGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return fileRefcountSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap fileRefcountGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return fileRefcountSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap fileRefcountGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap fileRefcountGapIterator) PrevSegment() fileRefcountIterator { + return fileRefcountsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap fileRefcountGapIterator) NextSegment() fileRefcountIterator { + return fileRefcountsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap fileRefcountGapIterator) PrevGap() fileRefcountGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return fileRefcountGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap fileRefcountGapIterator) NextGap() fileRefcountGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return fileRefcountGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap fileRefcountGapIterator) NextLargeEnoughGap(minSize uint64) fileRefcountGapIterator { + if fileRefcounttrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap fileRefcountGapIterator) nextLargeEnoughGapHelper(minSize uint64) fileRefcountGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return fileRefcountGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap fileRefcountGapIterator) PrevLargeEnoughGap(minSize uint64) fileRefcountGapIterator { + if fileRefcounttrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap fileRefcountGapIterator) prevLargeEnoughGapHelper(minSize uint64) fileRefcountGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return fileRefcountGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func fileRefcountsegmentBeforePosition(n *fileRefcountnode, i int) fileRefcountIterator { + for i == 0 { + if n.parent == nil { + return fileRefcountIterator{} + } + n, i = n.parent, n.parentIndex + } + return fileRefcountIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func fileRefcountsegmentAfterPosition(n *fileRefcountnode, i int) fileRefcountIterator { + for i == n.nrSegments { + if n.parent == nil { + return fileRefcountIterator{} + } + n, i = n.parent, n.parentIndex + } + return fileRefcountIterator{n, i} +} + +func fileRefcountzeroValueSlice(slice []int32) { + + for i := range slice { + fileRefcountSetFunctions{}.ClearValue(&slice[i]) + } +} + +func fileRefcountzeroNodeSlice(slice []*fileRefcountnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *fileRefcountSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *fileRefcountnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *fileRefcountnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if fileRefcounttrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type fileRefcountSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []int32 +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *fileRefcountSet) ExportSortedSlices() *fileRefcountSegmentDataSlices { + var sds fileRefcountSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *fileRefcountSet) ImportSortedSlices(sds *fileRefcountSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *fileRefcountSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.FileRange, int32) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *fileRefcountSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *fileRefcountSet) saveRoot() *fileRefcountSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *fileRefcountSet) loadRoot(sds *fileRefcountSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/mm/io_list.go b/pkg/sentry/mm/io_list.go new file mode 100644 index 000000000..9a54e60be --- /dev/null +++ b/pkg/sentry/mm/io_list.go @@ -0,0 +1,221 @@ +package mm + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type ioElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (ioElementMapper) linkerFor(elem *ioResult) *ioResult { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type ioList struct { + head *ioResult + tail *ioResult +} + +// Reset resets list l to the empty state. +func (l *ioList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *ioList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *ioList) Front() *ioResult { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *ioList) Back() *ioResult { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *ioList) Len() (count int) { + for e := l.Front(); e != nil; e = (ioElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *ioList) PushFront(e *ioResult) { + linker := ioElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + ioElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *ioList) PushBack(e *ioResult) { + linker := ioElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + ioElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *ioList) PushBackList(m *ioList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + ioElementMapper{}.linkerFor(l.tail).SetNext(m.head) + ioElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *ioList) InsertAfter(b, e *ioResult) { + bLinker := ioElementMapper{}.linkerFor(b) + eLinker := ioElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + ioElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *ioList) InsertBefore(a, e *ioResult) { + aLinker := ioElementMapper{}.linkerFor(a) + eLinker := ioElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + ioElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *ioList) Remove(e *ioResult) { + linker := ioElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + ioElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + ioElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type ioEntry struct { + next *ioResult + prev *ioResult +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *ioEntry) Next() *ioResult { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *ioEntry) Prev() *ioResult { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *ioEntry) SetNext(elem *ioResult) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *ioEntry) SetPrev(elem *ioResult) { + e.prev = elem +} diff --git a/pkg/sentry/mm/mm_state_autogen.go b/pkg/sentry/mm/mm_state_autogen.go new file mode 100644 index 000000000..c4cdf8bd2 --- /dev/null +++ b/pkg/sentry/mm/mm_state_autogen.go @@ -0,0 +1,808 @@ +// automatically generated by stateify. + +package mm + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (a *aioManager) StateTypeName() string { + return "pkg/sentry/mm.aioManager" +} + +func (a *aioManager) StateFields() []string { + return []string{ + "contexts", + } +} + +func (a *aioManager) beforeSave() {} + +// +checklocksignore +func (a *aioManager) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.contexts) +} + +func (a *aioManager) afterLoad() {} + +// +checklocksignore +func (a *aioManager) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.contexts) +} + +func (i *ioResult) StateTypeName() string { + return "pkg/sentry/mm.ioResult" +} + +func (i *ioResult) StateFields() []string { + return []string{ + "data", + "ioEntry", + } +} + +func (i *ioResult) beforeSave() {} + +// +checklocksignore +func (i *ioResult) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.data) + stateSinkObject.Save(1, &i.ioEntry) +} + +func (i *ioResult) afterLoad() {} + +// +checklocksignore +func (i *ioResult) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.data) + stateSourceObject.Load(1, &i.ioEntry) +} + +func (ctx *AIOContext) StateTypeName() string { + return "pkg/sentry/mm.AIOContext" +} + +func (ctx *AIOContext) StateFields() []string { + return []string{ + "results", + "maxOutstanding", + "outstanding", + } +} + +func (ctx *AIOContext) beforeSave() {} + +// +checklocksignore +func (ctx *AIOContext) StateSave(stateSinkObject state.Sink) { + ctx.beforeSave() + if !state.IsZeroValue(&ctx.dead) { + state.Failf("dead is %#v, expected zero", &ctx.dead) + } + stateSinkObject.Save(0, &ctx.results) + stateSinkObject.Save(1, &ctx.maxOutstanding) + stateSinkObject.Save(2, &ctx.outstanding) +} + +// +checklocksignore +func (ctx *AIOContext) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ctx.results) + stateSourceObject.Load(1, &ctx.maxOutstanding) + stateSourceObject.Load(2, &ctx.outstanding) + stateSourceObject.AfterLoad(ctx.afterLoad) +} + +func (m *aioMappable) StateTypeName() string { + return "pkg/sentry/mm.aioMappable" +} + +func (m *aioMappable) StateFields() []string { + return []string{ + "aioMappableRefs", + "mfp", + "fr", + } +} + +func (m *aioMappable) beforeSave() {} + +// +checklocksignore +func (m *aioMappable) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.aioMappableRefs) + stateSinkObject.Save(1, &m.mfp) + stateSinkObject.Save(2, &m.fr) +} + +func (m *aioMappable) afterLoad() {} + +// +checklocksignore +func (m *aioMappable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.aioMappableRefs) + stateSourceObject.Load(1, &m.mfp) + stateSourceObject.Load(2, &m.fr) +} + +func (r *aioMappableRefs) StateTypeName() string { + return "pkg/sentry/mm.aioMappableRefs" +} + +func (r *aioMappableRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *aioMappableRefs) beforeSave() {} + +// +checklocksignore +func (r *aioMappableRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *aioMappableRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (s *fileRefcountSet) StateTypeName() string { + return "pkg/sentry/mm.fileRefcountSet" +} + +func (s *fileRefcountSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *fileRefcountSet) beforeSave() {} + +// +checklocksignore +func (s *fileRefcountSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *fileRefcountSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *fileRefcountSet) afterLoad() {} + +// +checklocksignore +func (s *fileRefcountSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*fileRefcountSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*fileRefcountSegmentDataSlices)) }) +} + +func (n *fileRefcountnode) StateTypeName() string { + return "pkg/sentry/mm.fileRefcountnode" +} + +func (n *fileRefcountnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *fileRefcountnode) beforeSave() {} + +// +checklocksignore +func (n *fileRefcountnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *fileRefcountnode) afterLoad() {} + +// +checklocksignore +func (n *fileRefcountnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (f *fileRefcountSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/mm.fileRefcountSegmentDataSlices" +} + +func (f *fileRefcountSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (f *fileRefcountSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (f *fileRefcountSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Start) + stateSinkObject.Save(1, &f.End) + stateSinkObject.Save(2, &f.Values) +} + +func (f *fileRefcountSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (f *fileRefcountSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Start) + stateSourceObject.Load(1, &f.End) + stateSourceObject.Load(2, &f.Values) +} + +func (l *ioList) StateTypeName() string { + return "pkg/sentry/mm.ioList" +} + +func (l *ioList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *ioList) beforeSave() {} + +// +checklocksignore +func (l *ioList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *ioList) afterLoad() {} + +// +checklocksignore +func (l *ioList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *ioEntry) StateTypeName() string { + return "pkg/sentry/mm.ioEntry" +} + +func (e *ioEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *ioEntry) beforeSave() {} + +// +checklocksignore +func (e *ioEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *ioEntry) afterLoad() {} + +// +checklocksignore +func (e *ioEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (mm *MemoryManager) StateTypeName() string { + return "pkg/sentry/mm.MemoryManager" +} + +func (mm *MemoryManager) StateFields() []string { + return []string{ + "p", + "mfp", + "layout", + "privateRefs", + "users", + "vmas", + "brk", + "usageAS", + "lockedAS", + "dataAS", + "defMLockMode", + "pmas", + "curRSS", + "maxRSS", + "argv", + "envv", + "auxv", + "executable", + "dumpability", + "aioManager", + "sleepForActivation", + "vdsoSigReturnAddr", + "membarrierPrivateEnabled", + "membarrierRSeqEnabled", + } +} + +// +checklocksignore +func (mm *MemoryManager) StateSave(stateSinkObject state.Sink) { + mm.beforeSave() + if !state.IsZeroValue(&mm.active) { + state.Failf("active is %#v, expected zero", &mm.active) + } + if !state.IsZeroValue(&mm.captureInvalidations) { + state.Failf("captureInvalidations is %#v, expected zero", &mm.captureInvalidations) + } + stateSinkObject.Save(0, &mm.p) + stateSinkObject.Save(1, &mm.mfp) + stateSinkObject.Save(2, &mm.layout) + stateSinkObject.Save(3, &mm.privateRefs) + stateSinkObject.Save(4, &mm.users) + stateSinkObject.Save(5, &mm.vmas) + stateSinkObject.Save(6, &mm.brk) + stateSinkObject.Save(7, &mm.usageAS) + stateSinkObject.Save(8, &mm.lockedAS) + stateSinkObject.Save(9, &mm.dataAS) + stateSinkObject.Save(10, &mm.defMLockMode) + stateSinkObject.Save(11, &mm.pmas) + stateSinkObject.Save(12, &mm.curRSS) + stateSinkObject.Save(13, &mm.maxRSS) + stateSinkObject.Save(14, &mm.argv) + stateSinkObject.Save(15, &mm.envv) + stateSinkObject.Save(16, &mm.auxv) + stateSinkObject.Save(17, &mm.executable) + stateSinkObject.Save(18, &mm.dumpability) + stateSinkObject.Save(19, &mm.aioManager) + stateSinkObject.Save(20, &mm.sleepForActivation) + stateSinkObject.Save(21, &mm.vdsoSigReturnAddr) + stateSinkObject.Save(22, &mm.membarrierPrivateEnabled) + stateSinkObject.Save(23, &mm.membarrierRSeqEnabled) +} + +// +checklocksignore +func (mm *MemoryManager) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mm.p) + stateSourceObject.Load(1, &mm.mfp) + stateSourceObject.Load(2, &mm.layout) + stateSourceObject.Load(3, &mm.privateRefs) + stateSourceObject.Load(4, &mm.users) + stateSourceObject.Load(5, &mm.vmas) + stateSourceObject.Load(6, &mm.brk) + stateSourceObject.Load(7, &mm.usageAS) + stateSourceObject.Load(8, &mm.lockedAS) + stateSourceObject.Load(9, &mm.dataAS) + stateSourceObject.Load(10, &mm.defMLockMode) + stateSourceObject.Load(11, &mm.pmas) + stateSourceObject.Load(12, &mm.curRSS) + stateSourceObject.Load(13, &mm.maxRSS) + stateSourceObject.Load(14, &mm.argv) + stateSourceObject.Load(15, &mm.envv) + stateSourceObject.Load(16, &mm.auxv) + stateSourceObject.Load(17, &mm.executable) + stateSourceObject.Load(18, &mm.dumpability) + stateSourceObject.Load(19, &mm.aioManager) + stateSourceObject.Load(20, &mm.sleepForActivation) + stateSourceObject.Load(21, &mm.vdsoSigReturnAddr) + stateSourceObject.Load(22, &mm.membarrierPrivateEnabled) + stateSourceObject.Load(23, &mm.membarrierRSeqEnabled) + stateSourceObject.AfterLoad(mm.afterLoad) +} + +func (vma *vma) StateTypeName() string { + return "pkg/sentry/mm.vma" +} + +func (vma *vma) StateFields() []string { + return []string{ + "mappable", + "off", + "realPerms", + "dontfork", + "mlockMode", + "numaPolicy", + "numaNodemask", + "id", + "hint", + } +} + +func (vma *vma) beforeSave() {} + +// +checklocksignore +func (vma *vma) StateSave(stateSinkObject state.Sink) { + vma.beforeSave() + var realPermsValue int = vma.saveRealPerms() + stateSinkObject.SaveValue(2, realPermsValue) + stateSinkObject.Save(0, &vma.mappable) + stateSinkObject.Save(1, &vma.off) + stateSinkObject.Save(3, &vma.dontfork) + stateSinkObject.Save(4, &vma.mlockMode) + stateSinkObject.Save(5, &vma.numaPolicy) + stateSinkObject.Save(6, &vma.numaNodemask) + stateSinkObject.Save(7, &vma.id) + stateSinkObject.Save(8, &vma.hint) +} + +func (vma *vma) afterLoad() {} + +// +checklocksignore +func (vma *vma) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &vma.mappable) + stateSourceObject.Load(1, &vma.off) + stateSourceObject.Load(3, &vma.dontfork) + stateSourceObject.Load(4, &vma.mlockMode) + stateSourceObject.Load(5, &vma.numaPolicy) + stateSourceObject.Load(6, &vma.numaNodemask) + stateSourceObject.Load(7, &vma.id) + stateSourceObject.Load(8, &vma.hint) + stateSourceObject.LoadValue(2, new(int), func(y interface{}) { vma.loadRealPerms(y.(int)) }) +} + +func (p *pma) StateTypeName() string { + return "pkg/sentry/mm.pma" +} + +func (p *pma) StateFields() []string { + return []string{ + "off", + "translatePerms", + "effectivePerms", + "maxPerms", + "needCOW", + "private", + } +} + +func (p *pma) beforeSave() {} + +// +checklocksignore +func (p *pma) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.off) + stateSinkObject.Save(1, &p.translatePerms) + stateSinkObject.Save(2, &p.effectivePerms) + stateSinkObject.Save(3, &p.maxPerms) + stateSinkObject.Save(4, &p.needCOW) + stateSinkObject.Save(5, &p.private) +} + +func (p *pma) afterLoad() {} + +// +checklocksignore +func (p *pma) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.off) + stateSourceObject.Load(1, &p.translatePerms) + stateSourceObject.Load(2, &p.effectivePerms) + stateSourceObject.Load(3, &p.maxPerms) + stateSourceObject.Load(4, &p.needCOW) + stateSourceObject.Load(5, &p.private) +} + +func (p *privateRefs) StateTypeName() string { + return "pkg/sentry/mm.privateRefs" +} + +func (p *privateRefs) StateFields() []string { + return []string{ + "refs", + } +} + +func (p *privateRefs) beforeSave() {} + +// +checklocksignore +func (p *privateRefs) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.refs) +} + +func (p *privateRefs) afterLoad() {} + +// +checklocksignore +func (p *privateRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.refs) +} + +func (s *pmaSet) StateTypeName() string { + return "pkg/sentry/mm.pmaSet" +} + +func (s *pmaSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *pmaSet) beforeSave() {} + +// +checklocksignore +func (s *pmaSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *pmaSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *pmaSet) afterLoad() {} + +// +checklocksignore +func (s *pmaSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*pmaSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*pmaSegmentDataSlices)) }) +} + +func (n *pmanode) StateTypeName() string { + return "pkg/sentry/mm.pmanode" +} + +func (n *pmanode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *pmanode) beforeSave() {} + +// +checklocksignore +func (n *pmanode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *pmanode) afterLoad() {} + +// +checklocksignore +func (n *pmanode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (p *pmaSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/mm.pmaSegmentDataSlices" +} + +func (p *pmaSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (p *pmaSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (p *pmaSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Start) + stateSinkObject.Save(1, &p.End) + stateSinkObject.Save(2, &p.Values) +} + +func (p *pmaSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (p *pmaSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Start) + stateSourceObject.Load(1, &p.End) + stateSourceObject.Load(2, &p.Values) +} + +func (m *SpecialMappable) StateTypeName() string { + return "pkg/sentry/mm.SpecialMappable" +} + +func (m *SpecialMappable) StateFields() []string { + return []string{ + "SpecialMappableRefs", + "mfp", + "fr", + "name", + } +} + +func (m *SpecialMappable) beforeSave() {} + +// +checklocksignore +func (m *SpecialMappable) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.SpecialMappableRefs) + stateSinkObject.Save(1, &m.mfp) + stateSinkObject.Save(2, &m.fr) + stateSinkObject.Save(3, &m.name) +} + +func (m *SpecialMappable) afterLoad() {} + +// +checklocksignore +func (m *SpecialMappable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.SpecialMappableRefs) + stateSourceObject.Load(1, &m.mfp) + stateSourceObject.Load(2, &m.fr) + stateSourceObject.Load(3, &m.name) +} + +func (r *SpecialMappableRefs) StateTypeName() string { + return "pkg/sentry/mm.SpecialMappableRefs" +} + +func (r *SpecialMappableRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *SpecialMappableRefs) beforeSave() {} + +// +checklocksignore +func (r *SpecialMappableRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *SpecialMappableRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (s *vmaSet) StateTypeName() string { + return "pkg/sentry/mm.vmaSet" +} + +func (s *vmaSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *vmaSet) beforeSave() {} + +// +checklocksignore +func (s *vmaSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *vmaSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *vmaSet) afterLoad() {} + +// +checklocksignore +func (s *vmaSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*vmaSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*vmaSegmentDataSlices)) }) +} + +func (n *vmanode) StateTypeName() string { + return "pkg/sentry/mm.vmanode" +} + +func (n *vmanode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *vmanode) beforeSave() {} + +// +checklocksignore +func (n *vmanode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *vmanode) afterLoad() {} + +// +checklocksignore +func (n *vmanode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (v *vmaSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/mm.vmaSegmentDataSlices" +} + +func (v *vmaSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (v *vmaSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (v *vmaSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.Start) + stateSinkObject.Save(1, &v.End) + stateSinkObject.Save(2, &v.Values) +} + +func (v *vmaSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (v *vmaSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.Start) + stateSourceObject.Load(1, &v.End) + stateSourceObject.Load(2, &v.Values) +} + +func init() { + state.Register((*aioManager)(nil)) + state.Register((*ioResult)(nil)) + state.Register((*AIOContext)(nil)) + state.Register((*aioMappable)(nil)) + state.Register((*aioMappableRefs)(nil)) + state.Register((*fileRefcountSet)(nil)) + state.Register((*fileRefcountnode)(nil)) + state.Register((*fileRefcountSegmentDataSlices)(nil)) + state.Register((*ioList)(nil)) + state.Register((*ioEntry)(nil)) + state.Register((*MemoryManager)(nil)) + state.Register((*vma)(nil)) + state.Register((*pma)(nil)) + state.Register((*privateRefs)(nil)) + state.Register((*pmaSet)(nil)) + state.Register((*pmanode)(nil)) + state.Register((*pmaSegmentDataSlices)(nil)) + state.Register((*SpecialMappable)(nil)) + state.Register((*SpecialMappableRefs)(nil)) + state.Register((*vmaSet)(nil)) + state.Register((*vmanode)(nil)) + state.Register((*vmaSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go deleted file mode 100644 index 84cb8158d..000000000 --- a/pkg/sentry/mm/mm_test.go +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mm - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/usermem" -) - -func testMemoryManager(ctx context.Context) *MemoryManager { - p := platform.FromContext(ctx) - mfp := pgalloc.MemoryFileProviderFromContext(ctx) - mm := NewMemoryManager(p, mfp, false) - mm.layout = arch.MmapLayout{ - MinAddr: p.MinUserAddress(), - MaxAddr: p.MaxUserAddress(), - BottomUpBase: p.MinUserAddress(), - TopDownBase: p.MaxUserAddress(), - } - return mm -} - -func (mm *MemoryManager) realUsageAS() uint64 { - return uint64(mm.vmas.Span()) -} - -func TestUsageASUpdates(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: 2 * hostarch.PageSize, - Private: true, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - realUsage := mm.realUsageAS() - if mm.usageAS != realUsage { - t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage) - } - - mm.MUnmap(ctx, addr, hostarch.PageSize) - realUsage = mm.realUsageAS() - if mm.usageAS != realUsage { - t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage) - } -} - -func (mm *MemoryManager) realDataAS() uint64 { - var sz uint64 - for seg := mm.vmas.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - vma := seg.Value() - if vma.isPrivateDataLocked() { - sz += uint64(seg.Range().Length()) - } - } - return sz -} - -func TestDataASUpdates(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: 3 * hostarch.PageSize, - Private: true, - Perms: hostarch.Write, - MaxPerms: hostarch.AnyAccess, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - if mm.dataAS == 0 { - t.Fatalf("dataAS is 0, wanted not 0") - } - realDataAS := mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } - - mm.MUnmap(ctx, addr, hostarch.PageSize) - realDataAS = mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } - - mm.MProtect(addr+hostarch.PageSize, hostarch.PageSize, hostarch.Read, false) - realDataAS = mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } - - mm.MRemap(ctx, addr+2*hostarch.PageSize, hostarch.PageSize, 2*hostarch.PageSize, MRemapOpts{ - Move: MRemapMayMove, - }) - realDataAS = mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } -} - -func TestBrkDataLimitUpdates(t *testing.T) { - limitSet := limits.NewLimitSet() - limitSet.Set(limits.Data, limits.Limit{}, true /* privileged */) // zero RLIMIT_DATA - - ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - // Try to extend the brk by one page and expect doing so to fail. - oldBrk, _ := mm.Brk(ctx, 0) - if newBrk, _ := mm.Brk(ctx, oldBrk+hostarch.PageSize); newBrk != oldBrk { - t.Errorf("brk() increased data segment above RLIMIT_DATA (old brk = %#x, new brk = %#x", oldBrk, newBrk) - } -} - -// TestIOAfterUnmap ensures that IO fails after unmap. -func TestIOAfterUnmap(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: hostarch.PageSize, - Private: true, - Perms: hostarch.Read, - MaxPerms: hostarch.AnyAccess, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - - // IO works before munmap. - b := make([]byte, 1) - n, err := mm.CopyIn(ctx, addr, b, usermem.IOOpts{}) - if err != nil { - t.Errorf("CopyIn got err %v want nil", err) - } - if n != 1 { - t.Errorf("CopyIn got %d want 1", n) - } - - err = mm.MUnmap(ctx, addr, hostarch.PageSize) - if err != nil { - t.Fatalf("MUnmap got err %v want nil", err) - } - - n, err = mm.CopyIn(ctx, addr, b, usermem.IOOpts{}) - if !linuxerr.Equals(linuxerr.EFAULT, err) { - t.Errorf("CopyIn got err %v want EFAULT", err) - } - if n != 0 { - t.Errorf("CopyIn got %d want 0", n) - } -} - -// TestIOAfterMProtect tests IO interaction with mprotect permissions. -func TestIOAfterMProtect(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: hostarch.PageSize, - Private: true, - Perms: hostarch.ReadWrite, - MaxPerms: hostarch.AnyAccess, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - - // Writing works before mprotect. - b := make([]byte, 1) - n, err := mm.CopyOut(ctx, addr, b, usermem.IOOpts{}) - if err != nil { - t.Errorf("CopyOut got err %v want nil", err) - } - if n != 1 { - t.Errorf("CopyOut got %d want 1", n) - } - - err = mm.MProtect(addr, hostarch.PageSize, hostarch.Read, false) - if err != nil { - t.Errorf("MProtect got err %v want nil", err) - } - - // Without IgnorePermissions, CopyOut should no longer succeed. - n, err = mm.CopyOut(ctx, addr, b, usermem.IOOpts{}) - if !linuxerr.Equals(linuxerr.EFAULT, err) { - t.Errorf("CopyOut got err %v want EFAULT", err) - } - if n != 0 { - t.Errorf("CopyOut got %d want 0", n) - } - - // With IgnorePermissions, CopyOut should succeed despite mprotect. - n, err = mm.CopyOut(ctx, addr, b, usermem.IOOpts{ - IgnorePermissions: true, - }) - if err != nil { - t.Errorf("CopyOut got err %v want nil", err) - } - if n != 1 { - t.Errorf("CopyOut got %d want 1", n) - } -} - -// TestAIOPrepareAfterDestroy tests that AIOContext should not be able to be -// prepared after destruction. -func TestAIOPrepareAfterDestroy(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - id, err := mm.NewAIOContext(ctx, 1) - if err != nil { - t.Fatalf("mm.NewAIOContext got err %v want nil", err) - } - aioCtx, ok := mm.LookupAIOContext(ctx, id) - if !ok { - t.Fatalf("AIOContext not found") - } - mm.DestroyAIOContext(ctx, id) - - // Prepare should fail because aioCtx should be destroyed. - if err := aioCtx.Prepare(); !linuxerr.Equals(linuxerr.EINVAL, err) { - t.Errorf("aioCtx.Prepare got err %v want nil", err) - } else if err == nil { - aioCtx.CancelPendingRequest() - } -} - -// TestAIOLookupAfterDestroy tests that AIOContext should not be able to be -// looked up after memory manager is destroyed. -func TestAIOLookupAfterDestroy(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - - id, err := mm.NewAIOContext(ctx, 1) - if err != nil { - mm.DecUsers(ctx) - t.Fatalf("mm.NewAIOContext got err %v want nil", err) - } - mm.DecUsers(ctx) // This destroys the AIOContext manager. - - if _, ok := mm.LookupAIOContext(ctx, id); ok { - t.Errorf("AIOContext found even after AIOContext manager is destroyed") - } -} diff --git a/pkg/sentry/mm/pma_set.go b/pkg/sentry/mm/pma_set.go new file mode 100644 index 000000000..14a7c83b9 --- /dev/null +++ b/pkg/sentry/mm/pma_set.go @@ -0,0 +1,1647 @@ +package mm + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/hostarch" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const pmatrackGaps = 0 + +var _ = uint8(pmatrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type pmadynamicGap [pmatrackGaps]__generics_imported0.Addr + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *pmadynamicGap) Get() __generics_imported0.Addr { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *pmadynamicGap) Set(v __generics_imported0.Addr) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + pmaminDegree = 8 + + pmamaxDegree = 2 * pmaminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type pmaSet struct { + root pmanode `state:".(*pmaSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *pmaSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *pmaSet) IsEmptyRange(r __generics_imported0.AddrRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *pmaSet) Span() __generics_imported0.Addr { + var sz __generics_imported0.Addr + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *pmaSet) SpanRange(r __generics_imported0.AddrRange) __generics_imported0.Addr { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz __generics_imported0.Addr + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *pmaSet) FirstSegment() pmaIterator { + if s.root.nrSegments == 0 { + return pmaIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *pmaSet) LastSegment() pmaIterator { + if s.root.nrSegments == 0 { + return pmaIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *pmaSet) FirstGap() pmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return pmaGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *pmaSet) LastGap() pmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return pmaGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *pmaSet) Find(key __generics_imported0.Addr) (pmaIterator, pmaGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return pmaIterator{n, i}, pmaGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return pmaIterator{}, pmaGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *pmaSet) FindSegment(key __generics_imported0.Addr) pmaIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *pmaSet) LowerBoundSegment(min __generics_imported0.Addr) pmaIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *pmaSet) UpperBoundSegment(max __generics_imported0.Addr) pmaIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *pmaSet) FindGap(key __generics_imported0.Addr) pmaGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *pmaSet) LowerBoundGap(min __generics_imported0.Addr) pmaGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *pmaSet) UpperBoundGap(max __generics_imported0.Addr) pmaGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *pmaSet) Add(r __generics_imported0.AddrRange, val pma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *pmaSet) AddWithoutMerging(r __generics_imported0.AddrRange, val pma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *pmaSet) Insert(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (pmaSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := pmatrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (pmaSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (pmaSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := pmatrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *pmaSet) InsertWithoutMerging(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *pmaSet) InsertWithoutMergingUnchecked(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := pmatrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return pmaIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *pmaSet) Remove(seg pmaIterator) pmaGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if pmatrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + pmaSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if pmatrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(pmaGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *pmaSet) RemoveAll() { + s.root = pmanode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *pmaSet) RemoveRange(r __generics_imported0.AddrRange) pmaGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *pmaSet) Merge(first, second pmaIterator) pmaIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *pmaSet) MergeUnchecked(first, second pmaIterator) pmaIterator { + if first.End() == second.Start() { + if mval, ok := (pmaSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return pmaIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *pmaSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *pmaSet) MergeRange(r __generics_imported0.AddrRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *pmaSet) MergeAdjacent(r __generics_imported0.AddrRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *pmaSet) Split(seg pmaIterator, split __generics_imported0.Addr) (pmaIterator, pmaIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *pmaSet) SplitUnchecked(seg pmaIterator, split __generics_imported0.Addr) (pmaIterator, pmaIterator) { + val1, val2 := (pmaSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.AddrRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *pmaSet) SplitAt(split __generics_imported0.Addr) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *pmaSet) Isolate(seg pmaIterator, r __generics_imported0.AddrRange) pmaIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *pmaSet) ApplyContiguous(r __generics_imported0.AddrRange, fn func(seg pmaIterator)) pmaGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return pmaGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return pmaGapIterator{} + } + } +} + +// +stateify savable +type pmanode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *pmanode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap pmadynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [pmamaxDegree - 1]__generics_imported0.AddrRange + values [pmamaxDegree - 1]pma + children [pmamaxDegree]*pmanode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *pmanode) firstSegment() pmaIterator { + for n.hasChildren { + n = n.children[0] + } + return pmaIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *pmanode) lastSegment() pmaIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return pmaIterator{n, n.nrSegments - 1} +} + +func (n *pmanode) prevSibling() *pmanode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *pmanode) nextSibling() *pmanode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *pmanode) rebalanceBeforeInsert(gap pmaGapIterator) pmaGapIterator { + if n.nrSegments < pmamaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &pmanode{ + nrSegments: pmaminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &pmanode{ + nrSegments: pmaminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:pmaminDegree-1], n.keys[:pmaminDegree-1]) + copy(left.values[:pmaminDegree-1], n.values[:pmaminDegree-1]) + copy(right.keys[:pmaminDegree-1], n.keys[pmaminDegree:]) + copy(right.values[:pmaminDegree-1], n.values[pmaminDegree:]) + n.keys[0], n.values[0] = n.keys[pmaminDegree-1], n.values[pmaminDegree-1] + pmazeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:pmaminDegree], n.children[:pmaminDegree]) + copy(right.children[:pmaminDegree], n.children[pmaminDegree:]) + pmazeroNodeSlice(n.children[2:]) + for i := 0; i < pmaminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if pmatrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < pmaminDegree { + return pmaGapIterator{left, gap.index} + } + return pmaGapIterator{right, gap.index - pmaminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[pmaminDegree-1], n.values[pmaminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &pmanode{ + nrSegments: pmaminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:pmaminDegree-1], n.keys[pmaminDegree:]) + copy(sibling.values[:pmaminDegree-1], n.values[pmaminDegree:]) + pmazeroValueSlice(n.values[pmaminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:pmaminDegree], n.children[pmaminDegree:]) + pmazeroNodeSlice(n.children[pmaminDegree:]) + for i := 0; i < pmaminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = pmaminDegree - 1 + + if pmatrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < pmaminDegree { + return gap + } + return pmaGapIterator{sibling, gap.index - pmaminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *pmanode) rebalanceAfterRemove(gap pmaGapIterator) pmaGapIterator { + for { + if n.nrSegments >= pmaminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= pmaminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + pmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if pmatrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return pmaGapIterator{n, 0} + } + if gap.node == n { + return pmaGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= pmaminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + pmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if pmatrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return pmaGapIterator{n, n.nrSegments} + } + return pmaGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return pmaGapIterator{p, gap.index} + } + if gap.node == right { + return pmaGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *pmanode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = pmaGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + pmaSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if pmatrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *pmanode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *pmanode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *pmanode) calculateMaxGapLeaf() __generics_imported0.Addr { + max := pmaGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (pmaGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *pmanode) calculateMaxGapInternal() __generics_imported0.Addr { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *pmanode) searchFirstLargeEnoughGap(minSize __generics_imported0.Addr) pmaGapIterator { + if n.maxGap.Get() < minSize { + return pmaGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := pmaGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *pmanode) searchLastLargeEnoughGap(minSize __generics_imported0.Addr) pmaGapIterator { + if n.maxGap.Get() < minSize { + return pmaGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := pmaGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type pmaIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *pmanode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg pmaIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg pmaIterator) Range() __generics_imported0.AddrRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg pmaIterator) Start() __generics_imported0.Addr { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg pmaIterator) End() __generics_imported0.Addr { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg pmaIterator) SetRangeUnchecked(r __generics_imported0.AddrRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg pmaIterator) SetRange(r __generics_imported0.AddrRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg pmaIterator) SetStartUnchecked(start __generics_imported0.Addr) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg pmaIterator) SetStart(start __generics_imported0.Addr) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg pmaIterator) SetEndUnchecked(end __generics_imported0.Addr) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg pmaIterator) SetEnd(end __generics_imported0.Addr) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg pmaIterator) Value() pma { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg pmaIterator) ValuePtr() *pma { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg pmaIterator) SetValue(val pma) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg pmaIterator) PrevSegment() pmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return pmaIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return pmaIterator{} + } + return pmasegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg pmaIterator) NextSegment() pmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return pmaIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return pmaIterator{} + } + return pmasegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg pmaIterator) PrevGap() pmaGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return pmaGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg pmaIterator) NextGap() pmaGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return pmaGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg pmaIterator) PrevNonEmpty() (pmaIterator, pmaGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return pmaIterator{}, gap + } + return gap.PrevSegment(), pmaGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg pmaIterator) NextNonEmpty() (pmaIterator, pmaGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return pmaIterator{}, gap + } + return gap.NextSegment(), pmaGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type pmaGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *pmanode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap pmaGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap pmaGapIterator) Range() __generics_imported0.AddrRange { + return __generics_imported0.AddrRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap pmaGapIterator) Start() __generics_imported0.Addr { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return pmaSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap pmaGapIterator) End() __generics_imported0.Addr { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return pmaSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap pmaGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap pmaGapIterator) PrevSegment() pmaIterator { + return pmasegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap pmaGapIterator) NextSegment() pmaIterator { + return pmasegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap pmaGapIterator) PrevGap() pmaGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return pmaGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap pmaGapIterator) NextGap() pmaGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return pmaGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap pmaGapIterator) NextLargeEnoughGap(minSize __generics_imported0.Addr) pmaGapIterator { + if pmatrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap pmaGapIterator) nextLargeEnoughGapHelper(minSize __generics_imported0.Addr) pmaGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return pmaGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap pmaGapIterator) PrevLargeEnoughGap(minSize __generics_imported0.Addr) pmaGapIterator { + if pmatrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap pmaGapIterator) prevLargeEnoughGapHelper(minSize __generics_imported0.Addr) pmaGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return pmaGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func pmasegmentBeforePosition(n *pmanode, i int) pmaIterator { + for i == 0 { + if n.parent == nil { + return pmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return pmaIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func pmasegmentAfterPosition(n *pmanode, i int) pmaIterator { + for i == n.nrSegments { + if n.parent == nil { + return pmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return pmaIterator{n, i} +} + +func pmazeroValueSlice(slice []pma) { + + for i := range slice { + pmaSetFunctions{}.ClearValue(&slice[i]) + } +} + +func pmazeroNodeSlice(slice []*pmanode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *pmaSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *pmanode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *pmanode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if pmatrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type pmaSegmentDataSlices struct { + Start []__generics_imported0.Addr + End []__generics_imported0.Addr + Values []pma +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *pmaSet) ExportSortedSlices() *pmaSegmentDataSlices { + var sds pmaSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *pmaSet) ImportSortedSlices(sds *pmaSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.AddrRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *pmaSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.AddrRange, pma) error) error { + havePrev := false + prev := __generics_imported0.Addr(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *pmaSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *pmaSet) saveRoot() *pmaSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *pmaSet) loadRoot(sds *pmaSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/mm/special_mappable_refs.go b/pkg/sentry/mm/special_mappable_refs.go new file mode 100644 index 000000000..386a9fa3b --- /dev/null +++ b/pkg/sentry/mm/special_mappable_refs.go @@ -0,0 +1,140 @@ +package mm + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const SpecialMappableenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var SpecialMappableobj *SpecialMappable + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type SpecialMappableRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *SpecialMappableRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *SpecialMappableRefs) RefType() string { + return fmt.Sprintf("%T", SpecialMappableobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *SpecialMappableRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *SpecialMappableRefs) LogRefs() bool { + return SpecialMappableenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *SpecialMappableRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *SpecialMappableRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if SpecialMappableenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *SpecialMappableRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if SpecialMappableenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *SpecialMappableRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if SpecialMappableenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *SpecialMappableRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/mm/vma_set.go b/pkg/sentry/mm/vma_set.go new file mode 100644 index 000000000..298c86629 --- /dev/null +++ b/pkg/sentry/mm/vma_set.go @@ -0,0 +1,1647 @@ +package mm + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/hostarch" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const vmatrackGaps = 1 + +var _ = uint8(vmatrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type vmadynamicGap [vmatrackGaps]__generics_imported0.Addr + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *vmadynamicGap) Get() __generics_imported0.Addr { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *vmadynamicGap) Set(v __generics_imported0.Addr) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + vmaminDegree = 8 + + vmamaxDegree = 2 * vmaminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type vmaSet struct { + root vmanode `state:".(*vmaSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *vmaSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *vmaSet) IsEmptyRange(r __generics_imported0.AddrRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *vmaSet) Span() __generics_imported0.Addr { + var sz __generics_imported0.Addr + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *vmaSet) SpanRange(r __generics_imported0.AddrRange) __generics_imported0.Addr { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz __generics_imported0.Addr + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *vmaSet) FirstSegment() vmaIterator { + if s.root.nrSegments == 0 { + return vmaIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *vmaSet) LastSegment() vmaIterator { + if s.root.nrSegments == 0 { + return vmaIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *vmaSet) FirstGap() vmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return vmaGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *vmaSet) LastGap() vmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return vmaGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *vmaSet) Find(key __generics_imported0.Addr) (vmaIterator, vmaGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return vmaIterator{n, i}, vmaGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return vmaIterator{}, vmaGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *vmaSet) FindSegment(key __generics_imported0.Addr) vmaIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *vmaSet) LowerBoundSegment(min __generics_imported0.Addr) vmaIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *vmaSet) UpperBoundSegment(max __generics_imported0.Addr) vmaIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *vmaSet) FindGap(key __generics_imported0.Addr) vmaGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *vmaSet) LowerBoundGap(min __generics_imported0.Addr) vmaGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *vmaSet) UpperBoundGap(max __generics_imported0.Addr) vmaGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *vmaSet) Add(r __generics_imported0.AddrRange, val vma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *vmaSet) AddWithoutMerging(r __generics_imported0.AddrRange, val vma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *vmaSet) Insert(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (vmaSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := vmatrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (vmaSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (vmaSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := vmatrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *vmaSet) InsertWithoutMerging(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *vmaSet) InsertWithoutMergingUnchecked(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := vmatrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return vmaIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *vmaSet) Remove(seg vmaIterator) vmaGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if vmatrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + vmaSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if vmatrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(vmaGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *vmaSet) RemoveAll() { + s.root = vmanode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *vmaSet) RemoveRange(r __generics_imported0.AddrRange) vmaGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *vmaSet) Merge(first, second vmaIterator) vmaIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *vmaSet) MergeUnchecked(first, second vmaIterator) vmaIterator { + if first.End() == second.Start() { + if mval, ok := (vmaSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return vmaIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *vmaSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *vmaSet) MergeRange(r __generics_imported0.AddrRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *vmaSet) MergeAdjacent(r __generics_imported0.AddrRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *vmaSet) Split(seg vmaIterator, split __generics_imported0.Addr) (vmaIterator, vmaIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *vmaSet) SplitUnchecked(seg vmaIterator, split __generics_imported0.Addr) (vmaIterator, vmaIterator) { + val1, val2 := (vmaSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.AddrRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *vmaSet) SplitAt(split __generics_imported0.Addr) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *vmaSet) Isolate(seg vmaIterator, r __generics_imported0.AddrRange) vmaIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *vmaSet) ApplyContiguous(r __generics_imported0.AddrRange, fn func(seg vmaIterator)) vmaGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return vmaGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return vmaGapIterator{} + } + } +} + +// +stateify savable +type vmanode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *vmanode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap vmadynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [vmamaxDegree - 1]__generics_imported0.AddrRange + values [vmamaxDegree - 1]vma + children [vmamaxDegree]*vmanode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *vmanode) firstSegment() vmaIterator { + for n.hasChildren { + n = n.children[0] + } + return vmaIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *vmanode) lastSegment() vmaIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return vmaIterator{n, n.nrSegments - 1} +} + +func (n *vmanode) prevSibling() *vmanode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *vmanode) nextSibling() *vmanode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *vmanode) rebalanceBeforeInsert(gap vmaGapIterator) vmaGapIterator { + if n.nrSegments < vmamaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &vmanode{ + nrSegments: vmaminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &vmanode{ + nrSegments: vmaminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:vmaminDegree-1], n.keys[:vmaminDegree-1]) + copy(left.values[:vmaminDegree-1], n.values[:vmaminDegree-1]) + copy(right.keys[:vmaminDegree-1], n.keys[vmaminDegree:]) + copy(right.values[:vmaminDegree-1], n.values[vmaminDegree:]) + n.keys[0], n.values[0] = n.keys[vmaminDegree-1], n.values[vmaminDegree-1] + vmazeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:vmaminDegree], n.children[:vmaminDegree]) + copy(right.children[:vmaminDegree], n.children[vmaminDegree:]) + vmazeroNodeSlice(n.children[2:]) + for i := 0; i < vmaminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if vmatrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < vmaminDegree { + return vmaGapIterator{left, gap.index} + } + return vmaGapIterator{right, gap.index - vmaminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[vmaminDegree-1], n.values[vmaminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &vmanode{ + nrSegments: vmaminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:vmaminDegree-1], n.keys[vmaminDegree:]) + copy(sibling.values[:vmaminDegree-1], n.values[vmaminDegree:]) + vmazeroValueSlice(n.values[vmaminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:vmaminDegree], n.children[vmaminDegree:]) + vmazeroNodeSlice(n.children[vmaminDegree:]) + for i := 0; i < vmaminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = vmaminDegree - 1 + + if vmatrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < vmaminDegree { + return gap + } + return vmaGapIterator{sibling, gap.index - vmaminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *vmanode) rebalanceAfterRemove(gap vmaGapIterator) vmaGapIterator { + for { + if n.nrSegments >= vmaminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= vmaminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + vmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if vmatrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return vmaGapIterator{n, 0} + } + if gap.node == n { + return vmaGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= vmaminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + vmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if vmatrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return vmaGapIterator{n, n.nrSegments} + } + return vmaGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return vmaGapIterator{p, gap.index} + } + if gap.node == right { + return vmaGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *vmanode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = vmaGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + vmaSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if vmatrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *vmanode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *vmanode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *vmanode) calculateMaxGapLeaf() __generics_imported0.Addr { + max := vmaGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (vmaGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *vmanode) calculateMaxGapInternal() __generics_imported0.Addr { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *vmanode) searchFirstLargeEnoughGap(minSize __generics_imported0.Addr) vmaGapIterator { + if n.maxGap.Get() < minSize { + return vmaGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := vmaGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *vmanode) searchLastLargeEnoughGap(minSize __generics_imported0.Addr) vmaGapIterator { + if n.maxGap.Get() < minSize { + return vmaGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := vmaGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type vmaIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *vmanode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg vmaIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg vmaIterator) Range() __generics_imported0.AddrRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg vmaIterator) Start() __generics_imported0.Addr { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg vmaIterator) End() __generics_imported0.Addr { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg vmaIterator) SetRangeUnchecked(r __generics_imported0.AddrRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg vmaIterator) SetRange(r __generics_imported0.AddrRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg vmaIterator) SetStartUnchecked(start __generics_imported0.Addr) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg vmaIterator) SetStart(start __generics_imported0.Addr) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg vmaIterator) SetEndUnchecked(end __generics_imported0.Addr) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg vmaIterator) SetEnd(end __generics_imported0.Addr) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg vmaIterator) Value() vma { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg vmaIterator) ValuePtr() *vma { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg vmaIterator) SetValue(val vma) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg vmaIterator) PrevSegment() vmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return vmaIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return vmaIterator{} + } + return vmasegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg vmaIterator) NextSegment() vmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return vmaIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return vmaIterator{} + } + return vmasegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg vmaIterator) PrevGap() vmaGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return vmaGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg vmaIterator) NextGap() vmaGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return vmaGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg vmaIterator) PrevNonEmpty() (vmaIterator, vmaGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return vmaIterator{}, gap + } + return gap.PrevSegment(), vmaGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg vmaIterator) NextNonEmpty() (vmaIterator, vmaGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return vmaIterator{}, gap + } + return gap.NextSegment(), vmaGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type vmaGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *vmanode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap vmaGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap vmaGapIterator) Range() __generics_imported0.AddrRange { + return __generics_imported0.AddrRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap vmaGapIterator) Start() __generics_imported0.Addr { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return vmaSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap vmaGapIterator) End() __generics_imported0.Addr { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return vmaSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap vmaGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap vmaGapIterator) PrevSegment() vmaIterator { + return vmasegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap vmaGapIterator) NextSegment() vmaIterator { + return vmasegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap vmaGapIterator) PrevGap() vmaGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return vmaGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap vmaGapIterator) NextGap() vmaGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return vmaGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap vmaGapIterator) NextLargeEnoughGap(minSize __generics_imported0.Addr) vmaGapIterator { + if vmatrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap vmaGapIterator) nextLargeEnoughGapHelper(minSize __generics_imported0.Addr) vmaGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return vmaGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap vmaGapIterator) PrevLargeEnoughGap(minSize __generics_imported0.Addr) vmaGapIterator { + if vmatrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap vmaGapIterator) prevLargeEnoughGapHelper(minSize __generics_imported0.Addr) vmaGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return vmaGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func vmasegmentBeforePosition(n *vmanode, i int) vmaIterator { + for i == 0 { + if n.parent == nil { + return vmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return vmaIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func vmasegmentAfterPosition(n *vmanode, i int) vmaIterator { + for i == n.nrSegments { + if n.parent == nil { + return vmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return vmaIterator{n, i} +} + +func vmazeroValueSlice(slice []vma) { + + for i := range slice { + vmaSetFunctions{}.ClearValue(&slice[i]) + } +} + +func vmazeroNodeSlice(slice []*vmanode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *vmaSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *vmanode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *vmanode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if vmatrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type vmaSegmentDataSlices struct { + Start []__generics_imported0.Addr + End []__generics_imported0.Addr + Values []vma +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *vmaSet) ExportSortedSlices() *vmaSegmentDataSlices { + var sds vmaSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *vmaSet) ImportSortedSlices(sds *vmaSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.AddrRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *vmaSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.AddrRange, vma) error) error { + havePrev := false + prev := __generics_imported0.Addr(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *vmaSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *vmaSet) saveRoot() *vmaSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *vmaSet) loadRoot(sds *vmaSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD deleted file mode 100644 index d351869ef..000000000 --- a/pkg/sentry/pgalloc/BUILD +++ /dev/null @@ -1,112 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "evictable_range", - out = "evictable_range.go", - package = "pgalloc", - prefix = "Evictable", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "evictable_range_set", - out = "evictable_range_set.go", - package = "pgalloc", - prefix = "evictableRange", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "EvictableRange", - "Value": "evictableRangeSetValue", - "Functions": "evictableRangeSetFunctions", - }, -) - -go_template_instance( - name = "usage_set", - out = "usage_set.go", - consts = { - "minDegree": "10", - "trackGaps": "1", - }, - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "pgalloc", - prefix = "usage", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.FileRange", - "Value": "usageInfo", - "Functions": "usageSetFunctions", - }, -) - -go_template_instance( - name = "reclaim_set", - out = "reclaim_set.go", - consts = { - "minDegree": "10", - }, - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "pgalloc", - prefix = "reclaim", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.FileRange", - "Value": "reclaimSetValue", - "Functions": "reclaimSetFunctions", - }, -) - -go_library( - name = "pgalloc", - srcs = [ - "context.go", - "evictable_range.go", - "evictable_range_set.go", - "pgalloc.go", - "pgalloc_unsafe.go", - "reclaim_set.go", - "save_restore.go", - "usage_set.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/memutil", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/hostmm", - "//pkg/sentry/memmap", - "//pkg/sentry/usage", - "//pkg/state", - "//pkg/state/wire", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "pgalloc_test", - size = "small", - srcs = ["pgalloc_test.go"], - library = ":pgalloc", - deps = ["//pkg/hostarch"], -) diff --git a/pkg/sentry/pgalloc/evictable_range.go b/pkg/sentry/pgalloc/evictable_range.go new file mode 100644 index 000000000..79f513ac2 --- /dev/null +++ b/pkg/sentry/pgalloc/evictable_range.go @@ -0,0 +1,76 @@ +package pgalloc + +// A Range represents a contiguous range of T. +// +// +stateify savable +type EvictableRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +// +//go:nosplit +func (r EvictableRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +// +//go:nosplit +func (r EvictableRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +// +//go:nosplit +func (r EvictableRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +// +//go:nosplit +func (r EvictableRange) Overlaps(r2 EvictableRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +// +//go:nosplit +func (r EvictableRange) IsSupersetOf(r2 EvictableRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +// +//go:nosplit +func (r EvictableRange) Intersect(r2 EvictableRange) EvictableRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +// +//go:nosplit +func (r EvictableRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/pgalloc/evictable_range_set.go b/pkg/sentry/pgalloc/evictable_range_set.go new file mode 100644 index 000000000..c0c712b21 --- /dev/null +++ b/pkg/sentry/pgalloc/evictable_range_set.go @@ -0,0 +1,1643 @@ +package pgalloc + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const evictableRangetrackGaps = 0 + +var _ = uint8(evictableRangetrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type evictableRangedynamicGap [evictableRangetrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *evictableRangedynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *evictableRangedynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + evictableRangeminDegree = 3 + + evictableRangemaxDegree = 2 * evictableRangeminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type evictableRangeSet struct { + root evictableRangenode `state:".(*evictableRangeSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *evictableRangeSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *evictableRangeSet) IsEmptyRange(r EvictableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *evictableRangeSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *evictableRangeSet) SpanRange(r EvictableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *evictableRangeSet) FirstSegment() evictableRangeIterator { + if s.root.nrSegments == 0 { + return evictableRangeIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *evictableRangeSet) LastSegment() evictableRangeIterator { + if s.root.nrSegments == 0 { + return evictableRangeIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *evictableRangeSet) FirstGap() evictableRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return evictableRangeGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *evictableRangeSet) LastGap() evictableRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return evictableRangeGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *evictableRangeSet) Find(key uint64) (evictableRangeIterator, evictableRangeGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return evictableRangeIterator{n, i}, evictableRangeGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return evictableRangeIterator{}, evictableRangeGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *evictableRangeSet) FindSegment(key uint64) evictableRangeIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *evictableRangeSet) LowerBoundSegment(min uint64) evictableRangeIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *evictableRangeSet) UpperBoundSegment(max uint64) evictableRangeIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *evictableRangeSet) FindGap(key uint64) evictableRangeGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *evictableRangeSet) LowerBoundGap(min uint64) evictableRangeGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *evictableRangeSet) UpperBoundGap(max uint64) evictableRangeGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *evictableRangeSet) Add(r EvictableRange, val evictableRangeSetValue) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *evictableRangeSet) AddWithoutMerging(r EvictableRange, val evictableRangeSetValue) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *evictableRangeSet) Insert(gap evictableRangeGapIterator, r EvictableRange, val evictableRangeSetValue) evictableRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (evictableRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := evictableRangetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (evictableRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (evictableRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := evictableRangetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *evictableRangeSet) InsertWithoutMerging(gap evictableRangeGapIterator, r EvictableRange, val evictableRangeSetValue) evictableRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *evictableRangeSet) InsertWithoutMergingUnchecked(gap evictableRangeGapIterator, r EvictableRange, val evictableRangeSetValue) evictableRangeIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := evictableRangetrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return evictableRangeIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *evictableRangeSet) Remove(seg evictableRangeIterator) evictableRangeGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if evictableRangetrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + evictableRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if evictableRangetrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(evictableRangeGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *evictableRangeSet) RemoveAll() { + s.root = evictableRangenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *evictableRangeSet) RemoveRange(r EvictableRange) evictableRangeGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *evictableRangeSet) Merge(first, second evictableRangeIterator) evictableRangeIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *evictableRangeSet) MergeUnchecked(first, second evictableRangeIterator) evictableRangeIterator { + if first.End() == second.Start() { + if mval, ok := (evictableRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return evictableRangeIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *evictableRangeSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *evictableRangeSet) MergeRange(r EvictableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *evictableRangeSet) MergeAdjacent(r EvictableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *evictableRangeSet) Split(seg evictableRangeIterator, split uint64) (evictableRangeIterator, evictableRangeIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *evictableRangeSet) SplitUnchecked(seg evictableRangeIterator, split uint64) (evictableRangeIterator, evictableRangeIterator) { + val1, val2 := (evictableRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), EvictableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *evictableRangeSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *evictableRangeSet) Isolate(seg evictableRangeIterator, r EvictableRange) evictableRangeIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *evictableRangeSet) ApplyContiguous(r EvictableRange, fn func(seg evictableRangeIterator)) evictableRangeGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return evictableRangeGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return evictableRangeGapIterator{} + } + } +} + +// +stateify savable +type evictableRangenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *evictableRangenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap evictableRangedynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [evictableRangemaxDegree - 1]EvictableRange + values [evictableRangemaxDegree - 1]evictableRangeSetValue + children [evictableRangemaxDegree]*evictableRangenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *evictableRangenode) firstSegment() evictableRangeIterator { + for n.hasChildren { + n = n.children[0] + } + return evictableRangeIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *evictableRangenode) lastSegment() evictableRangeIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return evictableRangeIterator{n, n.nrSegments - 1} +} + +func (n *evictableRangenode) prevSibling() *evictableRangenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *evictableRangenode) nextSibling() *evictableRangenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *evictableRangenode) rebalanceBeforeInsert(gap evictableRangeGapIterator) evictableRangeGapIterator { + if n.nrSegments < evictableRangemaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &evictableRangenode{ + nrSegments: evictableRangeminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &evictableRangenode{ + nrSegments: evictableRangeminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:evictableRangeminDegree-1], n.keys[:evictableRangeminDegree-1]) + copy(left.values[:evictableRangeminDegree-1], n.values[:evictableRangeminDegree-1]) + copy(right.keys[:evictableRangeminDegree-1], n.keys[evictableRangeminDegree:]) + copy(right.values[:evictableRangeminDegree-1], n.values[evictableRangeminDegree:]) + n.keys[0], n.values[0] = n.keys[evictableRangeminDegree-1], n.values[evictableRangeminDegree-1] + evictableRangezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:evictableRangeminDegree], n.children[:evictableRangeminDegree]) + copy(right.children[:evictableRangeminDegree], n.children[evictableRangeminDegree:]) + evictableRangezeroNodeSlice(n.children[2:]) + for i := 0; i < evictableRangeminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if evictableRangetrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < evictableRangeminDegree { + return evictableRangeGapIterator{left, gap.index} + } + return evictableRangeGapIterator{right, gap.index - evictableRangeminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[evictableRangeminDegree-1], n.values[evictableRangeminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &evictableRangenode{ + nrSegments: evictableRangeminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:evictableRangeminDegree-1], n.keys[evictableRangeminDegree:]) + copy(sibling.values[:evictableRangeminDegree-1], n.values[evictableRangeminDegree:]) + evictableRangezeroValueSlice(n.values[evictableRangeminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:evictableRangeminDegree], n.children[evictableRangeminDegree:]) + evictableRangezeroNodeSlice(n.children[evictableRangeminDegree:]) + for i := 0; i < evictableRangeminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = evictableRangeminDegree - 1 + + if evictableRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < evictableRangeminDegree { + return gap + } + return evictableRangeGapIterator{sibling, gap.index - evictableRangeminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *evictableRangenode) rebalanceAfterRemove(gap evictableRangeGapIterator) evictableRangeGapIterator { + for { + if n.nrSegments >= evictableRangeminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= evictableRangeminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + evictableRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if evictableRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return evictableRangeGapIterator{n, 0} + } + if gap.node == n { + return evictableRangeGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= evictableRangeminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + evictableRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if evictableRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return evictableRangeGapIterator{n, n.nrSegments} + } + return evictableRangeGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return evictableRangeGapIterator{p, gap.index} + } + if gap.node == right { + return evictableRangeGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *evictableRangenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = evictableRangeGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + evictableRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if evictableRangetrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *evictableRangenode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *evictableRangenode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *evictableRangenode) calculateMaxGapLeaf() uint64 { + max := evictableRangeGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (evictableRangeGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *evictableRangenode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *evictableRangenode) searchFirstLargeEnoughGap(minSize uint64) evictableRangeGapIterator { + if n.maxGap.Get() < minSize { + return evictableRangeGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := evictableRangeGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *evictableRangenode) searchLastLargeEnoughGap(minSize uint64) evictableRangeGapIterator { + if n.maxGap.Get() < minSize { + return evictableRangeGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := evictableRangeGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type evictableRangeIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *evictableRangenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg evictableRangeIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg evictableRangeIterator) Range() EvictableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg evictableRangeIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg evictableRangeIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg evictableRangeIterator) SetRangeUnchecked(r EvictableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg evictableRangeIterator) SetRange(r EvictableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg evictableRangeIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg evictableRangeIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg evictableRangeIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg evictableRangeIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg evictableRangeIterator) Value() evictableRangeSetValue { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg evictableRangeIterator) ValuePtr() *evictableRangeSetValue { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg evictableRangeIterator) SetValue(val evictableRangeSetValue) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg evictableRangeIterator) PrevSegment() evictableRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return evictableRangeIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return evictableRangeIterator{} + } + return evictableRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg evictableRangeIterator) NextSegment() evictableRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return evictableRangeIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return evictableRangeIterator{} + } + return evictableRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg evictableRangeIterator) PrevGap() evictableRangeGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return evictableRangeGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg evictableRangeIterator) NextGap() evictableRangeGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return evictableRangeGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg evictableRangeIterator) PrevNonEmpty() (evictableRangeIterator, evictableRangeGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return evictableRangeIterator{}, gap + } + return gap.PrevSegment(), evictableRangeGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg evictableRangeIterator) NextNonEmpty() (evictableRangeIterator, evictableRangeGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return evictableRangeIterator{}, gap + } + return gap.NextSegment(), evictableRangeGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type evictableRangeGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *evictableRangenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap evictableRangeGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap evictableRangeGapIterator) Range() EvictableRange { + return EvictableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap evictableRangeGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return evictableRangeSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap evictableRangeGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return evictableRangeSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap evictableRangeGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap evictableRangeGapIterator) PrevSegment() evictableRangeIterator { + return evictableRangesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap evictableRangeGapIterator) NextSegment() evictableRangeIterator { + return evictableRangesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap evictableRangeGapIterator) PrevGap() evictableRangeGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return evictableRangeGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap evictableRangeGapIterator) NextGap() evictableRangeGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return evictableRangeGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap evictableRangeGapIterator) NextLargeEnoughGap(minSize uint64) evictableRangeGapIterator { + if evictableRangetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap evictableRangeGapIterator) nextLargeEnoughGapHelper(minSize uint64) evictableRangeGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return evictableRangeGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap evictableRangeGapIterator) PrevLargeEnoughGap(minSize uint64) evictableRangeGapIterator { + if evictableRangetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap evictableRangeGapIterator) prevLargeEnoughGapHelper(minSize uint64) evictableRangeGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return evictableRangeGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func evictableRangesegmentBeforePosition(n *evictableRangenode, i int) evictableRangeIterator { + for i == 0 { + if n.parent == nil { + return evictableRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return evictableRangeIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func evictableRangesegmentAfterPosition(n *evictableRangenode, i int) evictableRangeIterator { + for i == n.nrSegments { + if n.parent == nil { + return evictableRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return evictableRangeIterator{n, i} +} + +func evictableRangezeroValueSlice(slice []evictableRangeSetValue) { + + for i := range slice { + evictableRangeSetFunctions{}.ClearValue(&slice[i]) + } +} + +func evictableRangezeroNodeSlice(slice []*evictableRangenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *evictableRangeSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *evictableRangenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *evictableRangenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if evictableRangetrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type evictableRangeSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []evictableRangeSetValue +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *evictableRangeSet) ExportSortedSlices() *evictableRangeSegmentDataSlices { + var sds evictableRangeSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *evictableRangeSet) ImportSortedSlices(sds *evictableRangeSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := EvictableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *evictableRangeSet) segmentTestCheck(expectedSegments int, segFunc func(int, EvictableRange, evictableRangeSetValue) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *evictableRangeSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *evictableRangeSet) saveRoot() *evictableRangeSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *evictableRangeSet) loadRoot(sds *evictableRangeSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/pgalloc/pgalloc_state_autogen.go b/pkg/sentry/pgalloc/pgalloc_state_autogen.go new file mode 100644 index 000000000..272186381 --- /dev/null +++ b/pkg/sentry/pgalloc/pgalloc_state_autogen.go @@ -0,0 +1,389 @@ +// automatically generated by stateify. + +package pgalloc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *EvictableRange) StateTypeName() string { + return "pkg/sentry/pgalloc.EvictableRange" +} + +func (r *EvictableRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (r *EvictableRange) beforeSave() {} + +// +checklocksignore +func (r *EvictableRange) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Start) + stateSinkObject.Save(1, &r.End) +} + +func (r *EvictableRange) afterLoad() {} + +// +checklocksignore +func (r *EvictableRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Start) + stateSourceObject.Load(1, &r.End) +} + +func (s *evictableRangeSet) StateTypeName() string { + return "pkg/sentry/pgalloc.evictableRangeSet" +} + +func (s *evictableRangeSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *evictableRangeSet) beforeSave() {} + +// +checklocksignore +func (s *evictableRangeSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *evictableRangeSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *evictableRangeSet) afterLoad() {} + +// +checklocksignore +func (s *evictableRangeSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*evictableRangeSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*evictableRangeSegmentDataSlices)) }) +} + +func (n *evictableRangenode) StateTypeName() string { + return "pkg/sentry/pgalloc.evictableRangenode" +} + +func (n *evictableRangenode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *evictableRangenode) beforeSave() {} + +// +checklocksignore +func (n *evictableRangenode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *evictableRangenode) afterLoad() {} + +// +checklocksignore +func (n *evictableRangenode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (e *evictableRangeSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/pgalloc.evictableRangeSegmentDataSlices" +} + +func (e *evictableRangeSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (e *evictableRangeSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (e *evictableRangeSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Start) + stateSinkObject.Save(1, &e.End) + stateSinkObject.Save(2, &e.Values) +} + +func (e *evictableRangeSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (e *evictableRangeSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Start) + stateSourceObject.Load(1, &e.End) + stateSourceObject.Load(2, &e.Values) +} + +func (u *usageInfo) StateTypeName() string { + return "pkg/sentry/pgalloc.usageInfo" +} + +func (u *usageInfo) StateFields() []string { + return []string{ + "kind", + "knownCommitted", + "refs", + } +} + +func (u *usageInfo) beforeSave() {} + +// +checklocksignore +func (u *usageInfo) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.kind) + stateSinkObject.Save(1, &u.knownCommitted) + stateSinkObject.Save(2, &u.refs) +} + +func (u *usageInfo) afterLoad() {} + +// +checklocksignore +func (u *usageInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.kind) + stateSourceObject.Load(1, &u.knownCommitted) + stateSourceObject.Load(2, &u.refs) +} + +func (s *reclaimSet) StateTypeName() string { + return "pkg/sentry/pgalloc.reclaimSet" +} + +func (s *reclaimSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *reclaimSet) beforeSave() {} + +// +checklocksignore +func (s *reclaimSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *reclaimSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *reclaimSet) afterLoad() {} + +// +checklocksignore +func (s *reclaimSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*reclaimSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*reclaimSegmentDataSlices)) }) +} + +func (n *reclaimnode) StateTypeName() string { + return "pkg/sentry/pgalloc.reclaimnode" +} + +func (n *reclaimnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *reclaimnode) beforeSave() {} + +// +checklocksignore +func (n *reclaimnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *reclaimnode) afterLoad() {} + +// +checklocksignore +func (n *reclaimnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (r *reclaimSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/pgalloc.reclaimSegmentDataSlices" +} + +func (r *reclaimSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (r *reclaimSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (r *reclaimSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Start) + stateSinkObject.Save(1, &r.End) + stateSinkObject.Save(2, &r.Values) +} + +func (r *reclaimSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (r *reclaimSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Start) + stateSourceObject.Load(1, &r.End) + stateSourceObject.Load(2, &r.Values) +} + +func (s *usageSet) StateTypeName() string { + return "pkg/sentry/pgalloc.usageSet" +} + +func (s *usageSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *usageSet) beforeSave() {} + +// +checklocksignore +func (s *usageSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *usageSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *usageSet) afterLoad() {} + +// +checklocksignore +func (s *usageSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*usageSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*usageSegmentDataSlices)) }) +} + +func (n *usagenode) StateTypeName() string { + return "pkg/sentry/pgalloc.usagenode" +} + +func (n *usagenode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *usagenode) beforeSave() {} + +// +checklocksignore +func (n *usagenode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *usagenode) afterLoad() {} + +// +checklocksignore +func (n *usagenode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (u *usageSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/pgalloc.usageSegmentDataSlices" +} + +func (u *usageSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (u *usageSegmentDataSlices) beforeSave() {} + +// +checklocksignore +func (u *usageSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.Start) + stateSinkObject.Save(1, &u.End) + stateSinkObject.Save(2, &u.Values) +} + +func (u *usageSegmentDataSlices) afterLoad() {} + +// +checklocksignore +func (u *usageSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.Start) + stateSourceObject.Load(1, &u.End) + stateSourceObject.Load(2, &u.Values) +} + +func init() { + state.Register((*EvictableRange)(nil)) + state.Register((*evictableRangeSet)(nil)) + state.Register((*evictableRangenode)(nil)) + state.Register((*evictableRangeSegmentDataSlices)(nil)) + state.Register((*usageInfo)(nil)) + state.Register((*reclaimSet)(nil)) + state.Register((*reclaimnode)(nil)) + state.Register((*reclaimSegmentDataSlices)(nil)) + state.Register((*usageSet)(nil)) + state.Register((*usagenode)(nil)) + state.Register((*usageSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go deleted file mode 100644 index 8d2b7eb5e..000000000 --- a/pkg/sentry/pgalloc/pgalloc_test.go +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pgalloc - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/hostarch" -) - -const ( - page = hostarch.PageSize - hugepage = hostarch.HugePageSize - topPage = (1 << 63) - page -) - -func TestFindUnallocatedRange(t *testing.T) { - for _, test := range []struct { - desc string - usage *usageSegmentDataSlices - fileSize int64 - length uint64 - alignment uint64 - start uint64 - expectFail bool - }{ - { - desc: "Initial allocation succeeds", - usage: &usageSegmentDataSlices{}, - length: page, - alignment: page, - start: chunkSize - page, // Grows by chunkSize, allocate down. - }, - { - desc: "Allocation finds empty space at start of file", - usage: &usageSegmentDataSlices{ - Start: []uint64{page}, - End: []uint64{2 * page}, - Values: []usageInfo{{refs: 1}}, - }, - fileSize: 2 * page, - length: page, - alignment: page, - start: 0, - }, - { - desc: "Allocation finds empty space at end of file", - usage: &usageSegmentDataSlices{ - Start: []uint64{0}, - End: []uint64{page}, - Values: []usageInfo{{refs: 1}}, - }, - fileSize: 2 * page, - length: page, - alignment: page, - start: page, - }, - { - desc: "In-use frames are not allocatable", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, page}, - End: []uint64{page, 2 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}}, - }, - fileSize: 2 * page, - length: page, - alignment: page, - start: 3 * page, // Double fileSize, allocate top-down. - }, - { - desc: "Reclaimable frames are not allocatable", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, page, 2 * page}, - End: []uint64{page, 2 * page, 3 * page}, - Values: []usageInfo{{refs: 1}, {refs: 0}, {refs: 1}}, - }, - fileSize: 3 * page, - length: page, - alignment: page, - start: 5 * page, // Double fileSize, grow down. - }, - { - desc: "Gaps between in-use frames are allocatable", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, 2 * page}, - End: []uint64{page, 3 * page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - fileSize: 3 * page, - length: page, - alignment: page, - start: page, - }, - { - desc: "Inadequately-sized gaps are rejected", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, 2 * page}, - End: []uint64{page, 3 * page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - fileSize: 3 * page, - length: 2 * page, - alignment: page, - start: 4 * page, // Double fileSize, grow down. - }, - { - desc: "Alignment is honored at end of file", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, hugepage + page}, - // Hugepage-sized gap here that shouldn't be allocated from - // since it's incorrectly aligned. - End: []uint64{page, hugepage + 2*page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - fileSize: hugepage + 2*page, - length: hugepage, - alignment: hugepage, - start: 3 * hugepage, // Double fileSize until alignment is satisfied, grow down. - }, - { - desc: "Alignment is honored before end of file", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, 2*hugepage + page}, - // Page will need to be shifted down from top. - End: []uint64{page, 2*hugepage + 2*page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - fileSize: 2*hugepage + 2*page, - length: hugepage, - alignment: hugepage, - start: hugepage, - }, - { - desc: "Allocation doubles file size more than once if necessary", - usage: &usageSegmentDataSlices{}, - fileSize: page, - length: 4 * page, - alignment: page, - start: 0, - }, - { - desc: "Allocations are compact if possible", - usage: &usageSegmentDataSlices{ - Start: []uint64{page, 3 * page}, - End: []uint64{2 * page, 4 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}}, - }, - fileSize: 4 * page, - length: page, - alignment: page, - start: 2 * page, - }, - { - desc: "Top-down allocation within one gap", - usage: &usageSegmentDataSlices{ - Start: []uint64{page, 4 * page, 7 * page}, - End: []uint64{2 * page, 5 * page, 8 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}, {refs: 1}}, - }, - fileSize: 8 * page, - length: page, - alignment: page, - start: 6 * page, - }, - { - desc: "Top-down allocation between multiple gaps", - usage: &usageSegmentDataSlices{ - Start: []uint64{page, 3 * page, 5 * page}, - End: []uint64{2 * page, 4 * page, 6 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}, {refs: 1}}, - }, - fileSize: 6 * page, - length: page, - alignment: page, - start: 4 * page, - }, - { - desc: "Top-down allocation with large top gap", - usage: &usageSegmentDataSlices{ - Start: []uint64{page, 3 * page}, - End: []uint64{2 * page, 4 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}}, - }, - fileSize: 8 * page, - length: page, - alignment: page, - start: 7 * page, - }, - { - desc: "Gaps found with possible overflow", - usage: &usageSegmentDataSlices{ - Start: []uint64{page, topPage - page}, - End: []uint64{2 * page, topPage}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - fileSize: topPage, - length: page, - alignment: page, - start: topPage - 2*page, - }, - { - desc: "Overflow detected", - usage: &usageSegmentDataSlices{ - Start: []uint64{page}, - End: []uint64{topPage}, - Values: []usageInfo{{refs: 1}}, - }, - fileSize: topPage, - length: 2 * page, - alignment: page, - expectFail: true, - }, - } { - t.Run(test.desc, func(t *testing.T) { - var usage usageSet - if err := usage.ImportSortedSlices(test.usage); err != nil { - t.Fatalf("Failed to initialize usage from %v: %v", test.usage, err) - } - fr, ok := findAvailableRange(&usage, test.fileSize, test.length, test.alignment) - if !test.expectFail && !ok { - t.Fatalf("findAvailableRange(%v, %x, %x, %x): got %x, false wanted %x, true", test.usage, test.fileSize, test.length, test.alignment, fr.Start, test.start) - } - if test.expectFail && ok { - t.Fatalf("findAvailableRange(%v, %x, %x, %x): got %x, true wanted %x, false", test.usage, test.fileSize, test.length, test.alignment, fr.Start, test.start) - } - if ok && fr.Start != test.start { - t.Errorf("findAvailableRange(%v, %x, %x, %x): got start=%x, wanted %x", test.usage, test.fileSize, test.length, test.alignment, fr.Start, test.start) - } - if ok && fr.End != test.start+test.length { - t.Errorf("findAvailableRange(%v, %x, %x, %x): got end=%x, wanted %x", test.usage, test.fileSize, test.length, test.alignment, fr.End, test.start+test.length) - } - }) - } -} diff --git a/pkg/sentry/pgalloc/pgalloc_unsafe_state_autogen.go b/pkg/sentry/pgalloc/pgalloc_unsafe_state_autogen.go new file mode 100644 index 000000000..87c214008 --- /dev/null +++ b/pkg/sentry/pgalloc/pgalloc_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package pgalloc diff --git a/pkg/sentry/pgalloc/reclaim_set.go b/pkg/sentry/pgalloc/reclaim_set.go new file mode 100644 index 000000000..737f38776 --- /dev/null +++ b/pkg/sentry/pgalloc/reclaim_set.go @@ -0,0 +1,1647 @@ +package pgalloc + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const reclaimtrackGaps = 0 + +var _ = uint8(reclaimtrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type reclaimdynamicGap [reclaimtrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *reclaimdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *reclaimdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + reclaimminDegree = 10 + + reclaimmaxDegree = 2 * reclaimminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type reclaimSet struct { + root reclaimnode `state:".(*reclaimSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *reclaimSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *reclaimSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *reclaimSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *reclaimSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *reclaimSet) FirstSegment() reclaimIterator { + if s.root.nrSegments == 0 { + return reclaimIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *reclaimSet) LastSegment() reclaimIterator { + if s.root.nrSegments == 0 { + return reclaimIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *reclaimSet) FirstGap() reclaimGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return reclaimGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *reclaimSet) LastGap() reclaimGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return reclaimGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *reclaimSet) Find(key uint64) (reclaimIterator, reclaimGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return reclaimIterator{n, i}, reclaimGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return reclaimIterator{}, reclaimGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *reclaimSet) FindSegment(key uint64) reclaimIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *reclaimSet) LowerBoundSegment(min uint64) reclaimIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *reclaimSet) UpperBoundSegment(max uint64) reclaimIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *reclaimSet) FindGap(key uint64) reclaimGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *reclaimSet) LowerBoundGap(min uint64) reclaimGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *reclaimSet) UpperBoundGap(max uint64) reclaimGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *reclaimSet) Add(r __generics_imported0.FileRange, val reclaimSetValue) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *reclaimSet) AddWithoutMerging(r __generics_imported0.FileRange, val reclaimSetValue) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *reclaimSet) Insert(gap reclaimGapIterator, r __generics_imported0.FileRange, val reclaimSetValue) reclaimIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (reclaimSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := reclaimtrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (reclaimSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (reclaimSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := reclaimtrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *reclaimSet) InsertWithoutMerging(gap reclaimGapIterator, r __generics_imported0.FileRange, val reclaimSetValue) reclaimIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *reclaimSet) InsertWithoutMergingUnchecked(gap reclaimGapIterator, r __generics_imported0.FileRange, val reclaimSetValue) reclaimIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := reclaimtrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return reclaimIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *reclaimSet) Remove(seg reclaimIterator) reclaimGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if reclaimtrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + reclaimSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if reclaimtrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(reclaimGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *reclaimSet) RemoveAll() { + s.root = reclaimnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *reclaimSet) RemoveRange(r __generics_imported0.FileRange) reclaimGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *reclaimSet) Merge(first, second reclaimIterator) reclaimIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *reclaimSet) MergeUnchecked(first, second reclaimIterator) reclaimIterator { + if first.End() == second.Start() { + if mval, ok := (reclaimSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return reclaimIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *reclaimSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *reclaimSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *reclaimSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *reclaimSet) Split(seg reclaimIterator, split uint64) (reclaimIterator, reclaimIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *reclaimSet) SplitUnchecked(seg reclaimIterator, split uint64) (reclaimIterator, reclaimIterator) { + val1, val2 := (reclaimSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *reclaimSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *reclaimSet) Isolate(seg reclaimIterator, r __generics_imported0.FileRange) reclaimIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *reclaimSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg reclaimIterator)) reclaimGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return reclaimGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return reclaimGapIterator{} + } + } +} + +// +stateify savable +type reclaimnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *reclaimnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap reclaimdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [reclaimmaxDegree - 1]__generics_imported0.FileRange + values [reclaimmaxDegree - 1]reclaimSetValue + children [reclaimmaxDegree]*reclaimnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *reclaimnode) firstSegment() reclaimIterator { + for n.hasChildren { + n = n.children[0] + } + return reclaimIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *reclaimnode) lastSegment() reclaimIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return reclaimIterator{n, n.nrSegments - 1} +} + +func (n *reclaimnode) prevSibling() *reclaimnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *reclaimnode) nextSibling() *reclaimnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *reclaimnode) rebalanceBeforeInsert(gap reclaimGapIterator) reclaimGapIterator { + if n.nrSegments < reclaimmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &reclaimnode{ + nrSegments: reclaimminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &reclaimnode{ + nrSegments: reclaimminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:reclaimminDegree-1], n.keys[:reclaimminDegree-1]) + copy(left.values[:reclaimminDegree-1], n.values[:reclaimminDegree-1]) + copy(right.keys[:reclaimminDegree-1], n.keys[reclaimminDegree:]) + copy(right.values[:reclaimminDegree-1], n.values[reclaimminDegree:]) + n.keys[0], n.values[0] = n.keys[reclaimminDegree-1], n.values[reclaimminDegree-1] + reclaimzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:reclaimminDegree], n.children[:reclaimminDegree]) + copy(right.children[:reclaimminDegree], n.children[reclaimminDegree:]) + reclaimzeroNodeSlice(n.children[2:]) + for i := 0; i < reclaimminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if reclaimtrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < reclaimminDegree { + return reclaimGapIterator{left, gap.index} + } + return reclaimGapIterator{right, gap.index - reclaimminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[reclaimminDegree-1], n.values[reclaimminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &reclaimnode{ + nrSegments: reclaimminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:reclaimminDegree-1], n.keys[reclaimminDegree:]) + copy(sibling.values[:reclaimminDegree-1], n.values[reclaimminDegree:]) + reclaimzeroValueSlice(n.values[reclaimminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:reclaimminDegree], n.children[reclaimminDegree:]) + reclaimzeroNodeSlice(n.children[reclaimminDegree:]) + for i := 0; i < reclaimminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = reclaimminDegree - 1 + + if reclaimtrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < reclaimminDegree { + return gap + } + return reclaimGapIterator{sibling, gap.index - reclaimminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *reclaimnode) rebalanceAfterRemove(gap reclaimGapIterator) reclaimGapIterator { + for { + if n.nrSegments >= reclaimminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= reclaimminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + reclaimSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if reclaimtrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return reclaimGapIterator{n, 0} + } + if gap.node == n { + return reclaimGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= reclaimminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + reclaimSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if reclaimtrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return reclaimGapIterator{n, n.nrSegments} + } + return reclaimGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return reclaimGapIterator{p, gap.index} + } + if gap.node == right { + return reclaimGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *reclaimnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = reclaimGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + reclaimSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if reclaimtrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *reclaimnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *reclaimnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *reclaimnode) calculateMaxGapLeaf() uint64 { + max := reclaimGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (reclaimGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *reclaimnode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *reclaimnode) searchFirstLargeEnoughGap(minSize uint64) reclaimGapIterator { + if n.maxGap.Get() < minSize { + return reclaimGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := reclaimGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *reclaimnode) searchLastLargeEnoughGap(minSize uint64) reclaimGapIterator { + if n.maxGap.Get() < minSize { + return reclaimGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := reclaimGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type reclaimIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *reclaimnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg reclaimIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg reclaimIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg reclaimIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg reclaimIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg reclaimIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg reclaimIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg reclaimIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg reclaimIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg reclaimIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg reclaimIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg reclaimIterator) Value() reclaimSetValue { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg reclaimIterator) ValuePtr() *reclaimSetValue { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg reclaimIterator) SetValue(val reclaimSetValue) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg reclaimIterator) PrevSegment() reclaimIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return reclaimIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return reclaimIterator{} + } + return reclaimsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg reclaimIterator) NextSegment() reclaimIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return reclaimIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return reclaimIterator{} + } + return reclaimsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg reclaimIterator) PrevGap() reclaimGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return reclaimGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg reclaimIterator) NextGap() reclaimGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return reclaimGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg reclaimIterator) PrevNonEmpty() (reclaimIterator, reclaimGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return reclaimIterator{}, gap + } + return gap.PrevSegment(), reclaimGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg reclaimIterator) NextNonEmpty() (reclaimIterator, reclaimGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return reclaimIterator{}, gap + } + return gap.NextSegment(), reclaimGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type reclaimGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *reclaimnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap reclaimGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap reclaimGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap reclaimGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return reclaimSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap reclaimGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return reclaimSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap reclaimGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap reclaimGapIterator) PrevSegment() reclaimIterator { + return reclaimsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap reclaimGapIterator) NextSegment() reclaimIterator { + return reclaimsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap reclaimGapIterator) PrevGap() reclaimGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return reclaimGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap reclaimGapIterator) NextGap() reclaimGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return reclaimGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap reclaimGapIterator) NextLargeEnoughGap(minSize uint64) reclaimGapIterator { + if reclaimtrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap reclaimGapIterator) nextLargeEnoughGapHelper(minSize uint64) reclaimGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return reclaimGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap reclaimGapIterator) PrevLargeEnoughGap(minSize uint64) reclaimGapIterator { + if reclaimtrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap reclaimGapIterator) prevLargeEnoughGapHelper(minSize uint64) reclaimGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return reclaimGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func reclaimsegmentBeforePosition(n *reclaimnode, i int) reclaimIterator { + for i == 0 { + if n.parent == nil { + return reclaimIterator{} + } + n, i = n.parent, n.parentIndex + } + return reclaimIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func reclaimsegmentAfterPosition(n *reclaimnode, i int) reclaimIterator { + for i == n.nrSegments { + if n.parent == nil { + return reclaimIterator{} + } + n, i = n.parent, n.parentIndex + } + return reclaimIterator{n, i} +} + +func reclaimzeroValueSlice(slice []reclaimSetValue) { + + for i := range slice { + reclaimSetFunctions{}.ClearValue(&slice[i]) + } +} + +func reclaimzeroNodeSlice(slice []*reclaimnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *reclaimSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *reclaimnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *reclaimnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if reclaimtrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type reclaimSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []reclaimSetValue +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *reclaimSet) ExportSortedSlices() *reclaimSegmentDataSlices { + var sds reclaimSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *reclaimSet) ImportSortedSlices(sds *reclaimSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *reclaimSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.FileRange, reclaimSetValue) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *reclaimSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *reclaimSet) saveRoot() *reclaimSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *reclaimSet) loadRoot(sds *reclaimSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/pgalloc/usage_set.go b/pkg/sentry/pgalloc/usage_set.go new file mode 100644 index 000000000..8d96e817a --- /dev/null +++ b/pkg/sentry/pgalloc/usage_set.go @@ -0,0 +1,1647 @@ +package pgalloc + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const usagetrackGaps = 1 + +var _ = uint8(usagetrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type usagedynamicGap [usagetrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *usagedynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *usagedynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + usageminDegree = 10 + + usagemaxDegree = 2 * usageminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type usageSet struct { + root usagenode `state:".(*usageSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *usageSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *usageSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *usageSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *usageSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *usageSet) FirstSegment() usageIterator { + if s.root.nrSegments == 0 { + return usageIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *usageSet) LastSegment() usageIterator { + if s.root.nrSegments == 0 { + return usageIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *usageSet) FirstGap() usageGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return usageGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *usageSet) LastGap() usageGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return usageGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *usageSet) Find(key uint64) (usageIterator, usageGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return usageIterator{n, i}, usageGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return usageIterator{}, usageGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *usageSet) FindSegment(key uint64) usageIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *usageSet) LowerBoundSegment(min uint64) usageIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *usageSet) UpperBoundSegment(max uint64) usageIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *usageSet) FindGap(key uint64) usageGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *usageSet) LowerBoundGap(min uint64) usageGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *usageSet) UpperBoundGap(max uint64) usageGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *usageSet) Add(r __generics_imported0.FileRange, val usageInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *usageSet) AddWithoutMerging(r __generics_imported0.FileRange, val usageInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *usageSet) Insert(gap usageGapIterator, r __generics_imported0.FileRange, val usageInfo) usageIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (usageSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := usagetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (usageSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (usageSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := usagetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *usageSet) InsertWithoutMerging(gap usageGapIterator, r __generics_imported0.FileRange, val usageInfo) usageIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *usageSet) InsertWithoutMergingUnchecked(gap usageGapIterator, r __generics_imported0.FileRange, val usageInfo) usageIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := usagetrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return usageIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *usageSet) Remove(seg usageIterator) usageGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if usagetrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + usageSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if usagetrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(usageGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *usageSet) RemoveAll() { + s.root = usagenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *usageSet) RemoveRange(r __generics_imported0.FileRange) usageGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *usageSet) Merge(first, second usageIterator) usageIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *usageSet) MergeUnchecked(first, second usageIterator) usageIterator { + if first.End() == second.Start() { + if mval, ok := (usageSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return usageIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *usageSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *usageSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *usageSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *usageSet) Split(seg usageIterator, split uint64) (usageIterator, usageIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *usageSet) SplitUnchecked(seg usageIterator, split uint64) (usageIterator, usageIterator) { + val1, val2 := (usageSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *usageSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *usageSet) Isolate(seg usageIterator, r __generics_imported0.FileRange) usageIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *usageSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg usageIterator)) usageGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return usageGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return usageGapIterator{} + } + } +} + +// +stateify savable +type usagenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *usagenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap usagedynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [usagemaxDegree - 1]__generics_imported0.FileRange + values [usagemaxDegree - 1]usageInfo + children [usagemaxDegree]*usagenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *usagenode) firstSegment() usageIterator { + for n.hasChildren { + n = n.children[0] + } + return usageIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *usagenode) lastSegment() usageIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return usageIterator{n, n.nrSegments - 1} +} + +func (n *usagenode) prevSibling() *usagenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *usagenode) nextSibling() *usagenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *usagenode) rebalanceBeforeInsert(gap usageGapIterator) usageGapIterator { + if n.nrSegments < usagemaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &usagenode{ + nrSegments: usageminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &usagenode{ + nrSegments: usageminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:usageminDegree-1], n.keys[:usageminDegree-1]) + copy(left.values[:usageminDegree-1], n.values[:usageminDegree-1]) + copy(right.keys[:usageminDegree-1], n.keys[usageminDegree:]) + copy(right.values[:usageminDegree-1], n.values[usageminDegree:]) + n.keys[0], n.values[0] = n.keys[usageminDegree-1], n.values[usageminDegree-1] + usagezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:usageminDegree], n.children[:usageminDegree]) + copy(right.children[:usageminDegree], n.children[usageminDegree:]) + usagezeroNodeSlice(n.children[2:]) + for i := 0; i < usageminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if usagetrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < usageminDegree { + return usageGapIterator{left, gap.index} + } + return usageGapIterator{right, gap.index - usageminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[usageminDegree-1], n.values[usageminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &usagenode{ + nrSegments: usageminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:usageminDegree-1], n.keys[usageminDegree:]) + copy(sibling.values[:usageminDegree-1], n.values[usageminDegree:]) + usagezeroValueSlice(n.values[usageminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:usageminDegree], n.children[usageminDegree:]) + usagezeroNodeSlice(n.children[usageminDegree:]) + for i := 0; i < usageminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = usageminDegree - 1 + + if usagetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < usageminDegree { + return gap + } + return usageGapIterator{sibling, gap.index - usageminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *usagenode) rebalanceAfterRemove(gap usageGapIterator) usageGapIterator { + for { + if n.nrSegments >= usageminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= usageminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + usageSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if usagetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return usageGapIterator{n, 0} + } + if gap.node == n { + return usageGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= usageminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + usageSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if usagetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return usageGapIterator{n, n.nrSegments} + } + return usageGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return usageGapIterator{p, gap.index} + } + if gap.node == right { + return usageGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *usagenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = usageGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + usageSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if usagetrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *usagenode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *usagenode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *usagenode) calculateMaxGapLeaf() uint64 { + max := usageGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (usageGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *usagenode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *usagenode) searchFirstLargeEnoughGap(minSize uint64) usageGapIterator { + if n.maxGap.Get() < minSize { + return usageGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := usageGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *usagenode) searchLastLargeEnoughGap(minSize uint64) usageGapIterator { + if n.maxGap.Get() < minSize { + return usageGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := usageGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type usageIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *usagenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg usageIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg usageIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg usageIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg usageIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg usageIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg usageIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg usageIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg usageIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg usageIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg usageIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg usageIterator) Value() usageInfo { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg usageIterator) ValuePtr() *usageInfo { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg usageIterator) SetValue(val usageInfo) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg usageIterator) PrevSegment() usageIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return usageIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return usageIterator{} + } + return usagesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg usageIterator) NextSegment() usageIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return usageIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return usageIterator{} + } + return usagesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg usageIterator) PrevGap() usageGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return usageGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg usageIterator) NextGap() usageGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return usageGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg usageIterator) PrevNonEmpty() (usageIterator, usageGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return usageIterator{}, gap + } + return gap.PrevSegment(), usageGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg usageIterator) NextNonEmpty() (usageIterator, usageGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return usageIterator{}, gap + } + return gap.NextSegment(), usageGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type usageGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *usagenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap usageGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap usageGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap usageGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return usageSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap usageGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return usageSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap usageGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap usageGapIterator) PrevSegment() usageIterator { + return usagesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap usageGapIterator) NextSegment() usageIterator { + return usagesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap usageGapIterator) PrevGap() usageGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return usageGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap usageGapIterator) NextGap() usageGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return usageGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap usageGapIterator) NextLargeEnoughGap(minSize uint64) usageGapIterator { + if usagetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap usageGapIterator) nextLargeEnoughGapHelper(minSize uint64) usageGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return usageGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap usageGapIterator) PrevLargeEnoughGap(minSize uint64) usageGapIterator { + if usagetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap usageGapIterator) prevLargeEnoughGapHelper(minSize uint64) usageGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return usageGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func usagesegmentBeforePosition(n *usagenode, i int) usageIterator { + for i == 0 { + if n.parent == nil { + return usageIterator{} + } + n, i = n.parent, n.parentIndex + } + return usageIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func usagesegmentAfterPosition(n *usagenode, i int) usageIterator { + for i == n.nrSegments { + if n.parent == nil { + return usageIterator{} + } + n, i = n.parent, n.parentIndex + } + return usageIterator{n, i} +} + +func usagezeroValueSlice(slice []usageInfo) { + + for i := range slice { + usageSetFunctions{}.ClearValue(&slice[i]) + } +} + +func usagezeroNodeSlice(slice []*usagenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *usageSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *usagenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *usagenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if usagetrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type usageSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []usageInfo +} + +// ExportSortedSlices returns a copy of all segments in the given set, in +// ascending key order. +func (s *usageSet) ExportSortedSlices() *usageSegmentDataSlices { + var sds usageSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlices initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *usageSet) ImportSortedSlices(sds *usageSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *usageSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.FileRange, usageInfo) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *usageSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *usageSet) saveRoot() *usageSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *usageSet) loadRoot(sds *usageSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD deleted file mode 100644 index 7125657b3..000000000 --- a/pkg/sentry/platform/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "platform", - srcs = [ - "context.go", - "mmap_min_addr.go", - "platform.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/hostarch", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/hostmm", - "//pkg/sentry/memmap", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD deleted file mode 100644 index 83b385f14..000000000 --- a/pkg/sentry/platform/interrupt/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "interrupt", - srcs = [ - "interrupt.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/sync"], -) - -go_test( - name = "interrupt_test", - size = "small", - srcs = ["interrupt_test.go"], - library = ":interrupt", -) diff --git a/pkg/sentry/platform/interrupt/interrupt_state_autogen.go b/pkg/sentry/platform/interrupt/interrupt_state_autogen.go new file mode 100644 index 000000000..1336e5f01 --- /dev/null +++ b/pkg/sentry/platform/interrupt/interrupt_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package interrupt diff --git a/pkg/sentry/platform/interrupt/interrupt_test.go b/pkg/sentry/platform/interrupt/interrupt_test.go deleted file mode 100644 index 0ecdf6e7a..000000000 --- a/pkg/sentry/platform/interrupt/interrupt_test.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package interrupt - -import ( - "testing" -) - -type countingReceiver struct { - interrupts int -} - -// NotifyInterrupt implements Receiver.NotifyInterrupt. -func (r *countingReceiver) NotifyInterrupt() { - r.interrupts++ -} - -func TestSingleInterruptBeforeEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - f.NotifyInterrupt() - // The interrupt should cause the first Enable to fail. - if f.Enable(&r) { - f.Disable() - t.Fatalf("Enable: got true, wanted false") - } - // The failing Enable "acknowledges" the interrupt, allowing future Enables - // to succeed. - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - f.Disable() -} - -func TestMultipleInterruptsBeforeEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - f.NotifyInterrupt() - f.NotifyInterrupt() - // The interrupts should cause the first Enable to fail. - if f.Enable(&r) { - f.Disable() - t.Fatalf("Enable: got true, wanted false") - } - // Interrupts are deduplicated while the Forwarder is disabled, so the - // failing Enable "acknowledges" all interrupts, allowing future Enables to - // succeed. - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - f.Disable() -} - -func TestSingleInterruptAfterEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - defer f.Disable() - f.NotifyInterrupt() - if r.interrupts != 1 { - t.Errorf("interrupts: got %d, wanted 1", r.interrupts) - } -} - -func TestMultipleInterruptsAfterEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - defer f.Disable() - f.NotifyInterrupt() - f.NotifyInterrupt() - if r.interrupts != 2 { - t.Errorf("interrupts: got %d, wanted 2", r.interrupts) - } -} diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD deleted file mode 100644 index 8a490b3de..000000000 --- a/pkg/sentry/platform/kvm/BUILD +++ /dev/null @@ -1,101 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "kvm", - srcs = [ - "address_space.go", - "address_space_amd64.go", - "address_space_arm64.go", - "bluepill.go", - "bluepill_allocator.go", - "bluepill_amd64.go", - "bluepill_amd64_unsafe.go", - "bluepill_arm64.go", - "bluepill_arm64.s", - "bluepill_arm64_unsafe.go", - "bluepill_fault.go", - "bluepill_impl_amd64.s", - "bluepill_unsafe.go", - "context.go", - "filters_amd64.go", - "filters_arm64.go", - "kvm.go", - "kvm_amd64.go", - "kvm_amd64_unsafe.go", - "kvm_arm64.go", - "kvm_arm64_unsafe.go", - "kvm_const.go", - "kvm_const_arm64.go", - "machine.go", - "machine_amd64.go", - "machine_amd64_unsafe.go", - "machine_arm64.go", - "machine_arm64_unsafe.go", - "machine_unsafe.go", - "physical_map.go", - "physical_map_amd64.go", - "physical_map_arm64.go", - "virtual_map.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/atomicbitops", - "//pkg/context", - "//pkg/cpuid", - "//pkg/hostarch", - "//pkg/log", - "//pkg/procid", - "//pkg/ring0", - "//pkg/ring0/pagetables", - "//pkg/safecopy", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/arch/fpu", - "//pkg/sentry/memmap", - "//pkg/sentry/platform", - "//pkg/sentry/platform/interrupt", - "//pkg/sentry/time", - "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "kvm_test", - srcs = [ - "kvm_amd64_test.go", - "kvm_amd64_test.s", - "kvm_arm64_test.go", - "kvm_test.go", - "virtual_map_test.go", - ], - library = ":kvm", - tags = [ - "manual", - "nogotsan", - "requires-kvm", - ], - deps = [ - "//pkg/abi/linux", - "//pkg/hostarch", - "//pkg/ring0", - "//pkg/ring0/pagetables", - "//pkg/sentry/arch", - "//pkg/sentry/arch/fpu", - "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm/testutil", - "//pkg/sentry/time", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -genrule( - name = "bluepill_impl_amd64", - srcs = ["bluepill_amd64.s"], - outs = ["bluepill_impl_amd64.s"], - cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/ring0/gen_offsets) && cat $(SRCS)) > $@", - tools = ["//pkg/ring0/gen_offsets"], -) diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.s b/pkg/sentry/platform/kvm/bluepill_impl_amd64.s index c2a1dca11..99f254342 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.s +++ b/pkg/sentry/platform/kvm/bluepill_impl_amd64.s @@ -1,3 +1,73 @@ +// build +amd64 + +// Automatically generated, do not edit. + +// CPU offsets. +#define CPU_REGISTERS 0x28 +#define CPU_ERROR_CODE 0x10 +#define CPU_ERROR_TYPE 0x18 +#define CPU_ENTRY 0x20 + +// CPU entry offsets. +#define ENTRY_SCRATCH0 0x100 +#define ENTRY_STACK_TOP 0x108 +#define ENTRY_CPU_SELF 0x110 +#define ENTRY_KERNEL_CR3 0x118 + +// Bits. +#define _RFLAGS_IF 0x200 +#define _RFLAGS_IOPL0 0x1000 +#define _KERNEL_FLAGS 0x02 + +// Vectors. +#define DivideByZero 0x00 +#define Debug 0x01 +#define NMI 0x02 +#define Breakpoint 0x03 +#define Overflow 0x04 +#define BoundRangeExceeded 0x05 +#define InvalidOpcode 0x06 +#define DeviceNotAvailable 0x07 +#define DoubleFault 0x08 +#define CoprocessorSegmentOverrun 0x09 +#define InvalidTSS 0x0a +#define SegmentNotPresent 0x0b +#define StackSegmentFault 0x0c +#define GeneralProtectionFault 0x0d +#define PageFault 0x0e +#define X87FloatingPointException 0x10 +#define AlignmentCheck 0x11 +#define MachineCheck 0x12 +#define SIMDFloatingPointException 0x13 +#define VirtualizationException 0x14 +#define SecurityException 0x1e +#define SyscallInt80 0x80 +#define Syscall 0x100 + +// Ptrace registers. +#define PTRACE_R15 0x00 +#define PTRACE_R14 0x08 +#define PTRACE_R13 0x10 +#define PTRACE_R12 0x18 +#define PTRACE_RBP 0x20 +#define PTRACE_RBX 0x28 +#define PTRACE_R11 0x30 +#define PTRACE_R10 0x38 +#define PTRACE_R9 0x40 +#define PTRACE_R8 0x48 +#define PTRACE_RAX 0x50 +#define PTRACE_RCX 0x58 +#define PTRACE_RDX 0x60 +#define PTRACE_RSI 0x68 +#define PTRACE_RDI 0x70 +#define PTRACE_ORIGRAX 0x78 +#define PTRACE_RIP 0x80 +#define PTRACE_CS 0x88 +#define PTRACE_FLAGS 0x90 +#define PTRACE_RSP 0x98 +#define PTRACE_SS 0xa0 +#define PTRACE_FS_BASE 0xa8 +#define PTRACE_GS_BASE 0xb0 // Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/pkg/sentry/platform/kvm/kvm_amd64_state_autogen.go b/pkg/sentry/platform/kvm/kvm_amd64_state_autogen.go new file mode 100644 index 000000000..1bc956971 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 && amd64 && amd64 +// +build amd64,amd64,amd64 + +package kvm diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go deleted file mode 100644 index c3fbbdc75..000000000 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build amd64 -// +build amd64 - -package kvm - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/ring0" - "gvisor.dev/gvisor/pkg/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" -) - -func TestSegments(t *testing.T) { - applicationTest(t, true, testutil.AddrOfTwiddleSegments(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - testutil.SetTestSegments(regs) - for { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != nil { - t.Errorf("application segment check with full restore got unexpected error: %v", err) - } - if err := testutil.CheckTestSegments(regs); err != nil { - t.Errorf("application segment check with full restore failed: %v", err) - } - break // Done. - } - return false - }) -} - -// stmxcsr reads the MXCSR control and status register. -func stmxcsr(addr *uint32) - -func TestMXCSR(t *testing.T) { - applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - switchOpts := ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - } - - const mxcsrControllMask = uint32(0x1f80) - mxcsrBefore := uint32(0) - mxcsrAfter := uint32(0) - stmxcsr(&mxcsrBefore) - if mxcsrBefore == 0 { - // goruntime sets mxcsr to 0x1f80 and it never changes - // the control configuration. - panic("mxcsr is zero") - } - switchOpts.FloatingPointState.SetMXCSR(0) - if _, err := c.SwitchToUser( - switchOpts, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != nil { - t.Errorf("application syscall failed: %v", err) - } - stmxcsr(&mxcsrAfter) - if mxcsrAfter&mxcsrControllMask != mxcsrBefore&mxcsrControllMask { - t.Errorf("mxcsr = %x (expected %x)", mxcsrBefore, mxcsrAfter) - } - return false - }) -} diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.s b/pkg/sentry/platform/kvm/kvm_amd64_test.s deleted file mode 100644 index 8e9079867..000000000 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.s +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "textflag.h" - -// stmxcsr reads the MXCSR control and status register. -TEXT ·stmxcsr(SB),NOSPLIT,$0-8 - MOVQ addr+0(FP), SI - STMXCSR (SI) - RET diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe_state_autogen.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe_state_autogen.go new file mode 100644 index 000000000..1bc956971 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 && amd64 && amd64 +// +build amd64,amd64,amd64 + +package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64_state_autogen.go b/pkg/sentry/platform/kvm/kvm_arm64_state_autogen.go new file mode 100644 index 000000000..fc675a238 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 && arm64 && arm64 +// +build arm64,arm64,arm64 + +package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64_test.go b/pkg/sentry/platform/kvm/kvm_arm64_test.go deleted file mode 100644 index b53e354da..000000000 --- a/pkg/sentry/platform/kvm/kvm_arm64_test.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build arm64 -// +build arm64 - -package kvm - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" -) - -func TestKernelTLS(t *testing.T) { - bluepillTest(t, func(c *vCPU) { - if !testutil.TLSWorks() { - t.Errorf("tls does not work, and it should!") - } - }) -} diff --git a/pkg/sentry/platform/kvm/kvm_arm64_unsafe_state_autogen.go b/pkg/sentry/platform/kvm/kvm_arm64_unsafe_state_autogen.go new file mode 100644 index 000000000..fc675a238 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_arm64_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 && arm64 && arm64 +// +build arm64,arm64,arm64 + +package kvm diff --git a/pkg/sentry/platform/kvm/kvm_state_autogen.go b/pkg/sentry/platform/kvm/kvm_state_autogen.go new file mode 100644 index 000000000..8d85b96d0 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package kvm diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go deleted file mode 100644 index 3a30286e2..000000000 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ /dev/null @@ -1,529 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kvm - -import ( - "math/rand" - "reflect" - "sync/atomic" - "testing" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/ring0" - "gvisor.dev/gvisor/pkg/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/arch/fpu" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" - ktime "gvisor.dev/gvisor/pkg/sentry/time" -) - -var dummyFPState = fpu.NewState() - -type testHarness interface { - Errorf(format string, args ...interface{}) - Fatalf(format string, args ...interface{}) -} - -func kvmTest(t testHarness, setup func(*KVM), fn func(*vCPU) bool) { - // Create the machine. - deviceFile, err := OpenDevice() - if err != nil { - t.Fatalf("error opening device file: %v", err) - } - k, err := New(deviceFile) - if err != nil { - t.Fatalf("error creating KVM instance: %v", err) - } - defer k.machine.Destroy() - - // Call additional setup. - if setup != nil { - setup(k) - } - - var c *vCPU // For recovery. - defer func() { - redpill() - if c != nil { - k.machine.Put(c) - } - }() - for { - c = k.machine.Get() - if !fn(c) { - break - } - - // We put the vCPU here and clear the value so that the - // deferred recovery will not re-put it above. - k.machine.Put(c) - c = nil - } -} - -func bluepillTest(t testHarness, fn func(*vCPU)) { - kvmTest(t, nil, func(c *vCPU) bool { - bluepill(c) - fn(c) - return false - }) -} - -func TestKernelSyscall(t *testing.T) { - bluepillTest(t, func(c *vCPU) { - redpill() // Leave guest mode. - if got := atomic.LoadUint32(&c.state); got != vCPUUser { - t.Errorf("vCPU not in ready state: got %v", got) - } - }) -} - -func hostFault() { - defer func() { - recover() - }() - var foo *int - *foo = 0 -} - -func TestKernelFault(t *testing.T) { - hostFault() // Ensure recovery works. - bluepillTest(t, func(c *vCPU) { - hostFault() - if got := atomic.LoadUint32(&c.state); got != vCPUUser { - t.Errorf("vCPU not in ready state: got %v", got) - } - }) -} - -func TestKernelFloatingPoint(t *testing.T) { - bluepillTest(t, func(c *vCPU) { - if !testutil.FloatingPointWorks() { - t.Errorf("floating point does not work, and it should!") - } - }) -} - -func applicationTest(t testHarness, useHostMappings bool, targetFn uintptr, fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) { - // Initialize registers & page tables. - var ( - regs arch.Registers - pt *pagetables.PageTables - ) - testutil.SetTestTarget(®s, targetFn) - - kvmTest(t, func(k *KVM) { - // Create new page tables. - as, _, err := k.NewAddressSpace(nil /* invalidator */) - if err != nil { - t.Fatalf("can't create new address space: %v", err) - } - pt = as.(*addressSpace).pageTables - - if useHostMappings { - // Apply the physical mappings to these page tables. - // (This is normally dangerous, since they point to - // physical pages that may not exist. This shouldn't be - // done for regular user code, but is fine for test - // purposes.) - applyPhysicalRegions(func(pr physicalRegion) bool { - pt.Map(hostarch.Addr(pr.virtual), pr.length, pagetables.MapOpts{ - AccessType: hostarch.AnyAccess, - User: true, - }, pr.physical) - return true // Keep iterating. - }) - } - }, func(c *vCPU) bool { - // Invoke the function with the extra data. - return fn(c, ®s, pt) - }) -} - -func TestApplicationSyscall(t *testing.T) { - applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != nil { - t.Errorf("application syscall with full restore failed: %v", err) - } - return false - }) - applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != nil { - t.Errorf("application syscall with partial restore failed: %v", err) - } - return false - }) -} - -func TestApplicationFault(t *testing.T) { - applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - testutil.SetTouchTarget(regs, nil) // Cause fault. - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != platform.ErrContextSignal || si.Signo != int32(unix.SIGSEGV) { - t.Errorf("application fault with full restore got (%v, %v), expected (%v, SIGSEGV)", err, si, platform.ErrContextSignal) - } - return false - }) - applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - testutil.SetTouchTarget(regs, nil) // Cause fault. - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != platform.ErrContextSignal || si.Signo != int32(unix.SIGSEGV) { - t.Errorf("application fault with partial restore got (%v, %v), expected (%v, SIGSEGV)", err, si, platform.ErrContextSignal) - } - return false - }) -} - -func TestRegistersSyscall(t *testing.T) { - applicationTest(t, true, testutil.AddrOfTwiddleRegsSyscall(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - testutil.SetTestRegs(regs) // Fill values for all registers. - for { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != nil { - t.Errorf("application register check with partial restore got unexpected error: %v", err) - } - if err := testutil.CheckTestRegs(regs, false); err != nil { - t.Errorf("application register check with partial restore failed: %v", err) - } - break // Done. - } - return false - }) -} - -func TestRegistersFault(t *testing.T) { - applicationTest(t, true, testutil.AddrOfTwiddleRegsFault(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - testutil.SetTestRegs(regs) // Fill values for all registers. - for { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != platform.ErrContextSignal || si.Signo != int32(unix.SIGSEGV) { - t.Errorf("application register check with full restore got unexpected error: %v", err) - } - if err := testutil.CheckTestRegs(regs, true); err != nil { - t.Errorf("application register check with full restore failed: %v", err) - } - break // Done. - } - return false - }) -} - -func TestBounce(t *testing.T) { - applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - go func() { - time.Sleep(time.Millisecond) - c.BounceToKernel() - }() - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - return false - }) - applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - go func() { - time.Sleep(time.Millisecond) - c.BounceToKernel() - }() - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application full restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - return false - }) -} - -func TestBounceStress(t *testing.T) { - applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - randomSleep := func() { - // O(hundreds of microseconds) is appropriate to ensure - // different overlaps and different schedules. - if n := rand.Intn(1000); n > 100 { - time.Sleep(time.Duration(n) * time.Microsecond) - } - } - for i := 0; i < 1000; i++ { - // Start an asynchronously executing goroutine that - // calls Bounce at pseudo-random point in time. - // This should wind up calling Bounce when the - // kernel is in various stages of the switch. - go func() { - randomSleep() - c.BounceToKernel() - }() - randomSleep() - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - c.unlock() - randomSleep() - c.lock() - } - return false - }) -} - -func TestInvalidate(t *testing.T) { - var data uintptr // Used below. - applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - testutil.SetTouchTarget(regs, &data) // Read legitimate value. - for { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != nil { - t.Errorf("application partial restore: got %v, wanted nil", err) - } - break // Done. - } - // Unmap the page containing data & invalidate. - pt.Unmap(hostarch.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(hostarch.PageSize-1)), hostarch.PageSize) - for { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - Flush: true, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != platform.ErrContextSignal { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextSignal) - } - break // Success. - } - return false - }) -} - -// IsFault returns true iff the given signal represents a fault. -func IsFault(err error, si *linux.SignalInfo) bool { - return err == platform.ErrContextSignal && si.Signo == int32(unix.SIGSEGV) -} - -func TestEmptyAddressSpace(t *testing.T) { - applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if !IsFault(err, &si) { - t.Errorf("first fault with partial restore failed got %v", err) - t.Logf("registers: %#v", ®s) - } - return false - }) - applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if !IsFault(err, &si) { - t.Errorf("first fault with full restore failed got %v", err) - t.Logf("registers: %#v", ®s) - } - return false - }) -} - -func TestWrongVCPU(t *testing.T) { - kvmTest(t, nil, func(c1 *vCPU) bool { - kvmTest(t, nil, func(c2 *vCPU) bool { - // Basic test, one then the other. - bluepill(c1) - bluepill(c2) - if c1.guestExits == 0 { - // Check: vCPU1 will exit due to redpill() in bluepill(c2). - // Don't allow the test to proceed if this fails. - t.Fatalf("wrong vCPU#1 exits: vCPU1=%+v,vCPU2=%+v", c1, c2) - } - - // Alternate vCPUs; we expect to need to trigger the - // wrong vCPU path on each switch. - for i := 0; i < 100; i++ { - bluepill(c1) - bluepill(c2) - } - if count := c1.guestExits; count < 90 { - t.Errorf("wrong vCPU#1 exits: vCPU1=%+v,vCPU2=%+v", c1, c2) - } - if count := c2.guestExits; count < 90 { - t.Errorf("wrong vCPU#2 exits: vCPU1=%+v,vCPU2=%+v", c1, c2) - } - return false - }) - return false - }) - kvmTest(t, nil, func(c1 *vCPU) bool { - kvmTest(t, nil, func(c2 *vCPU) bool { - bluepill(c1) - bluepill(c2) - return false - }) - return false - }) -} - -func TestRdtsc(t *testing.T) { - var i int // Iteration count. - kvmTest(t, nil, func(c *vCPU) bool { - start := ktime.Rdtsc() - bluepill(c) - guest := ktime.Rdtsc() - redpill() - end := ktime.Rdtsc() - if start > guest || guest > end { - t.Errorf("inconsistent time: start=%d, guest=%d, end=%d", start, guest, end) - } - i++ - return i < 100 - }) -} - -func BenchmarkApplicationSyscall(b *testing.B) { - var ( - i int // Iteration includes machine.Get() / machine.Put(). - a int // Count for ErrContextInterrupt. - ) - applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - a++ - return true // Ignore. - } else if err != nil { - b.Fatalf("benchmark failed: %v", err) - } - i++ - return i < b.N - }) - if a != 0 { - b.Logf("ErrContextInterrupt occurred %d times (in %d iterations).", a, a+i) - } -} - -func BenchmarkKernelSyscall(b *testing.B) { - // Note that the target passed here is irrelevant, we never execute SwitchToUser. - applicationTest(b, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - // iteration does not include machine.Get() / machine.Put(). - for i := 0; i < b.N; i++ { - testutil.Getpid() - } - return false - }) -} - -func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { - // see BenchmarkApplicationSyscall. - var ( - i int - a int - ) - applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { - var si linux.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: &dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - a++ - return true // Ignore. - } else if err != nil { - b.Fatalf("benchmark failed: %v", err) - } - // This will intentionally cause the world switch. By executing - // a host syscall here, we force the transition between guest - // and host mode. - testutil.Getpid() - i++ - return i < b.N - }) - if a != 0 { - b.Logf("ErrContextInterrupt occurred %d times (in %d iterations).", a, a+i) - } -} diff --git a/pkg/sentry/platform/kvm/kvm_unsafe_state_autogen.go b/pkg/sentry/platform/kvm/kvm_unsafe_state_autogen.go new file mode 100644 index 000000000..fbf940222 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build go1.12 && !go1.18 && go1.12 && !go1.18 +// +build go1.12,!go1.18,go1.12,!go1.18 + +package kvm diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD deleted file mode 100644 index f7feb8683..000000000 --- a/pkg/sentry/platform/kvm/testutil/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "testutil", - testonly = 1, - srcs = [ - "testutil.go", - "testutil_amd64.go", - "testutil_amd64.s", - "testutil_arm64.go", - "testutil_arm64.s", - ], - visibility = ["//pkg/sentry/platform/kvm:__pkg__"], - deps = ["//pkg/sentry/arch"], -) diff --git a/pkg/sentry/platform/kvm/testutil/testutil.go b/pkg/sentry/platform/kvm/testutil/testutil.go deleted file mode 100644 index d8c273796..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testutil provides common assembly stubs for testing. -package testutil - -import ( - "fmt" - "strings" -) - -// Getpid executes a trivial system call. -func Getpid() - -// AddrOfGetpid returns the address of Getpid. -// -// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal -// wrapper function rather than the function itself. We must reference from -// assembly to get the ABI0 (i.e., primary) address. -func AddrOfGetpid() uintptr - -// AddrOfTouch returns the address of a function that touches the value in the -// first register. -func AddrOfTouch() uintptr -func touch() - -// AddrOfSyscallLoop returns the address of a function that executes a syscall -// and loops. -func AddrOfSyscallLoop() uintptr -func syscallLoop() - -// AddrOfSpinLoop returns the address of a function that spins on the CPU. -func AddrOfSpinLoop() uintptr -func spinLoop() - -// AddrOfHaltLoop returns the address of a function that immediately halts and -// loops. -func AddrOfHaltLoop() uintptr -func haltLoop() - -// AddrOfTwiddleRegsFault returns the address of a function that twiddles -// registers then faults. -func AddrOfTwiddleRegsFault() uintptr -func twiddleRegsFault() - -// AddrOfTwiddleRegsSyscall returns the address of a function that twiddles -// registers then executes a syscall. -func AddrOfTwiddleRegsSyscall() uintptr -func twiddleRegsSyscall() - -// FloatingPointWorks is a floating point test. -// -// It returns true or false. -func FloatingPointWorks() bool - -// RegisterMismatchError is used for checking registers. -type RegisterMismatchError []string - -// Error returns a human-readable error. -func (r RegisterMismatchError) Error() string { - return strings.Join([]string(r), ";") -} - -// addRegisterMisatch allows simple chaining of register mismatches. -func addRegisterMismatch(err error, reg string, got, expected interface{}) error { - errStr := fmt.Sprintf("%s got %08x, expected %08x", reg, got, expected) - switch r := err.(type) { - case nil: - // Return a new register mismatch. - return RegisterMismatchError{errStr} - case RegisterMismatchError: - // Append the error. - r = append(r, errStr) - return r - default: - // Leave as is. - return err - } -} diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go deleted file mode 100644 index 98c52b2f5..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build amd64 -// +build amd64 - -package testutil - -import ( - "reflect" - - "gvisor.dev/gvisor/pkg/sentry/arch" -) - -// AddrOfTwiddleSegments return the address of a function that reads segments -// into known registers. -func AddrOfTwiddleSegments() uintptr -func twiddleSegments() - -// SetTestTarget sets the rip appropriately. -func SetTestTarget(regs *arch.Registers, fn uintptr) { - regs.Rip = uint64(fn) -} - -// SetTouchTarget sets rax appropriately. -func SetTouchTarget(regs *arch.Registers, target *uintptr) { - if target != nil { - regs.Rax = uint64(reflect.ValueOf(target).Pointer()) - } else { - regs.Rax = 0 - } -} - -// RewindSyscall rewinds a syscall RIP. -func RewindSyscall(regs *arch.Registers) { - regs.Rip -= 2 -} - -// SetTestRegs initializes registers to known values. -func SetTestRegs(regs *arch.Registers) { - regs.R15 = 0x15 - regs.R14 = 0x14 - regs.R13 = 0x13 - regs.R12 = 0x12 - regs.Rbp = 0xb9 - regs.Rbx = 0xb4 - regs.R11 = 0x11 - regs.R10 = 0x10 - regs.R9 = 0x09 - regs.R8 = 0x08 - regs.Rax = 0x44 - regs.Rcx = 0xc4 - regs.Rdx = 0xd4 - regs.Rsi = 0x51 - regs.Rdi = 0xd1 - regs.Rsp = 0x59 -} - -// CheckTestRegs checks that registers were twiddled per TwiddleRegs. -func CheckTestRegs(regs *arch.Registers, full bool) (err error) { - if need := ^uint64(0x15); regs.R15 != need { - err = addRegisterMismatch(err, "R15", regs.R15, need) - } - if need := ^uint64(0x14); regs.R14 != need { - err = addRegisterMismatch(err, "R14", regs.R14, need) - } - if need := ^uint64(0x13); regs.R13 != need { - err = addRegisterMismatch(err, "R13", regs.R13, need) - } - if need := ^uint64(0x12); regs.R12 != need { - err = addRegisterMismatch(err, "R12", regs.R12, need) - } - if need := ^uint64(0xb9); regs.Rbp != need { - err = addRegisterMismatch(err, "Rbp", regs.Rbp, need) - } - if need := ^uint64(0xb4); regs.Rbx != need { - err = addRegisterMismatch(err, "Rbx", regs.Rbx, need) - } - if need := ^uint64(0x10); regs.R10 != need { - err = addRegisterMismatch(err, "R10", regs.R10, need) - } - if need := ^uint64(0x09); regs.R9 != need { - err = addRegisterMismatch(err, "R9", regs.R9, need) - } - if need := ^uint64(0x08); regs.R8 != need { - err = addRegisterMismatch(err, "R8", regs.R8, need) - } - if need := ^uint64(0x44); regs.Rax != need { - err = addRegisterMismatch(err, "Rax", regs.Rax, need) - } - if need := ^uint64(0xd4); regs.Rdx != need { - err = addRegisterMismatch(err, "Rdx", regs.Rdx, need) - } - if need := ^uint64(0x51); regs.Rsi != need { - err = addRegisterMismatch(err, "Rsi", regs.Rsi, need) - } - if need := ^uint64(0xd1); regs.Rdi != need { - err = addRegisterMismatch(err, "Rdi", regs.Rdi, need) - } - if need := ^uint64(0x59); regs.Rsp != need { - err = addRegisterMismatch(err, "Rsp", regs.Rsp, need) - } - // Rcx & R11 are ignored if !full is set. - if need := ^uint64(0x11); full && regs.R11 != need { - err = addRegisterMismatch(err, "R11", regs.R11, need) - } - if need := ^uint64(0xc4); full && regs.Rcx != need { - err = addRegisterMismatch(err, "Rcx", regs.Rcx, need) - } - return -} - -var fsData uint64 = 0x55 -var gsData uint64 = 0x85 - -// SetTestSegments initializes segments to known values. -func SetTestSegments(regs *arch.Registers) { - regs.Fs_base = uint64(reflect.ValueOf(&fsData).Pointer()) - regs.Gs_base = uint64(reflect.ValueOf(&gsData).Pointer()) -} - -// CheckTestSegments checks that registers were twiddled per TwiddleSegments. -func CheckTestSegments(regs *arch.Registers) (err error) { - if regs.Rax != fsData { - err = addRegisterMismatch(err, "Rax", regs.Rax, fsData) - } - if regs.Rbx != gsData { - err = addRegisterMismatch(err, "Rbx", regs.Rcx, gsData) - } - return -} diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s deleted file mode 100644 index 65e7c05ea..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -// test_util_amd64.s provides AMD64 test functions. - -#include "funcdata.h" -#include "textflag.h" - -TEXT ·Getpid(SB),NOSPLIT,$0 - NO_LOCAL_POINTERS - MOVQ $39, AX // getpid - SYSCALL - RET - -// func AddrOfGetpid() uintptr -TEXT ·AddrOfGetpid(SB), $0-8 - MOVQ $·Getpid(SB), AX - MOVQ AX, ret+0(FP) - RET - -TEXT ·touch(SB),NOSPLIT,$0 -start: - MOVQ 0(AX), BX // deref AX - MOVQ $39, AX // getpid - SYSCALL - JMP start - -// func AddrOfTouch() uintptr -TEXT ·AddrOfTouch(SB), $0-8 - MOVQ $·touch(SB), AX - MOVQ AX, ret+0(FP) - RET - -TEXT ·syscallLoop(SB),NOSPLIT,$0 -start: - SYSCALL - JMP start - -// func AddrOfSyscallLoop() uintptr -TEXT ·AddrOfSyscallLoop(SB), $0-8 - MOVQ $·syscallLoop(SB), AX - MOVQ AX, ret+0(FP) - RET - -TEXT ·spinLoop(SB),NOSPLIT,$0 -start: - JMP start - -// func AddrOfSpinLoop() uintptr -TEXT ·AddrOfSpinLoop(SB), $0-8 - MOVQ $·spinLoop(SB), AX - MOVQ AX, ret+0(FP) - RET - -TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 - NO_LOCAL_POINTERS - MOVQ $1, AX - MOVQ AX, X0 - MOVQ $39, AX // getpid - SYSCALL - MOVQ X0, AX - CMPQ AX, $1 - SETEQ ret+0(FP) - RET - -#define TWIDDLE_REGS() \ - NOTQ R15; \ - NOTQ R14; \ - NOTQ R13; \ - NOTQ R12; \ - NOTQ BP; \ - NOTQ BX; \ - NOTQ R11; \ - NOTQ R10; \ - NOTQ R9; \ - NOTQ R8; \ - NOTQ AX; \ - NOTQ CX; \ - NOTQ DX; \ - NOTQ SI; \ - NOTQ DI; \ - NOTQ SP; - -TEXT ·twiddleRegsSyscall(SB),NOSPLIT,$0 - TWIDDLE_REGS() - SYSCALL - RET // never reached - -// func AddrOfTwiddleRegsSyscall() uintptr -TEXT ·AddrOfTwiddleRegsSyscall(SB), $0-8 - MOVQ $·twiddleRegsSyscall(SB), AX - MOVQ AX, ret+0(FP) - RET - -TEXT ·twiddleRegsFault(SB),NOSPLIT,$0 - TWIDDLE_REGS() - JMP AX // must fault - RET // never reached - -// func AddrOfTwiddleRegsFault() uintptr -TEXT ·AddrOfTwiddleRegsFault(SB), $0-8 - MOVQ $·twiddleRegsFault(SB), AX - MOVQ AX, ret+0(FP) - RET - -#define READ_FS() BYTE $0x64; BYTE $0x48; BYTE $0x8b; BYTE $0x00; -#define READ_GS() BYTE $0x65; BYTE $0x48; BYTE $0x8b; BYTE $0x00; - -TEXT ·twiddleSegments(SB),NOSPLIT,$0 - MOVQ $0x0, AX - READ_GS() - MOVQ AX, BX - MOVQ $0x0, AX - READ_FS() - SYSCALL - RET // never reached - -// func AddrOfTwiddleSegments() uintptr -TEXT ·AddrOfTwiddleSegments(SB), $0-8 - MOVQ $·twiddleSegments(SB), AX - MOVQ AX, ret+0(FP) - RET diff --git a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go deleted file mode 100644 index 6d0ba8252..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build arm64 -// +build arm64 - -package testutil - -import ( - "fmt" - "reflect" - - "gvisor.dev/gvisor/pkg/sentry/arch" -) - -// TLSWorks is a tls test. -// -// It returns true or false. -func TLSWorks() bool - -// SetTestTarget sets the rip appropriately. -func SetTestTarget(regs *arch.Registers, fn func()) { - regs.Pc = uint64(reflect.ValueOf(fn).Pointer()) -} - -// SetTouchTarget sets rax appropriately. -func SetTouchTarget(regs *arch.Registers, target *uintptr) { - if target != nil { - regs.Regs[8] = uint64(reflect.ValueOf(target).Pointer()) - } else { - regs.Regs[8] = 0 - } -} - -// RewindSyscall rewinds a syscall RIP. -func RewindSyscall(regs *arch.Registers) { - regs.Pc -= 4 -} - -// SetTestRegs initializes registers to known values. -func SetTestRegs(regs *arch.Registers) { - for i := 0; i <= 30; i++ { - regs.Regs[i] = uint64(i) + 1 - } -} - -// CheckTestRegs checks that registers were twiddled per TwiddleRegs. -func CheckTestRegs(regs *arch.Registers, full bool) (err error) { - for i := 0; i <= 30; i++ { - if need := ^uint64(i + 1); regs.Regs[i] != need { - err = addRegisterMismatch(err, fmt.Sprintf("R%d", i), regs.Regs[i], need) - } - } - // Check tls. - if need := ^uint64(11); regs.TPIDR_EL0 != need { - err = addRegisterMismatch(err, "tpdir_el0", regs.TPIDR_EL0, need) - } - return -} diff --git a/pkg/sentry/platform/kvm/testutil/testutil_arm64.s b/pkg/sentry/platform/kvm/testutil/testutil_arm64.s deleted file mode 100644 index 7348c29a5..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil_arm64.s +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build arm64 - -// test_util_arm64.s provides ARM64 test functions. - -#include "funcdata.h" -#include "textflag.h" - -#define SYS_GETPID 172 - -// This function simulates the getpid syscall. -TEXT ·Getpid(SB),NOSPLIT,$0 - NO_LOCAL_POINTERS - MOVD $SYS_GETPID, R8 - SVC - RET - -TEXT ·Touch(SB),NOSPLIT,$0 -start: - MOVD 0(R8), R1 - MOVD $SYS_GETPID, R8 // getpid - SVC - B start - -TEXT ·HaltLoop(SB),NOSPLIT,$0 -start: - HLT - B start - -// This function simulates a loop of syscall. -TEXT ·SyscallLoop(SB),NOSPLIT,$0 -start: - SVC - B start - -TEXT ·SpinLoop(SB),NOSPLIT,$0 -start: - B start - -TEXT ·TLSWorks(SB),NOSPLIT,$0-8 - NO_LOCAL_POINTERS - MOVD $0x6789, R5 - MSR R5, TPIDR_EL0 - MOVD $SYS_GETPID, R8 // getpid - SVC - MRS TPIDR_EL0, R6 - CMP R5, R6 - BNE isNaN - MOVD $1, R0 - MOVD R0, ret+0(FP) - RET -isNaN: - MOVD $0, ret+0(FP) - RET - -TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 - NO_LOCAL_POINTERS - // gc will touch fpsimd, so we should test it. - // such as in <runtime.deductSweepCredit>. - FMOVD $(9.9), F0 - MOVD $SYS_GETPID, R8 // getpid - SVC - FMOVD $(9.9), F1 - FCMPD F0, F1 - BNE isNaN - MOVD $1, R0 - MOVD R0, ret+0(FP) - RET -isNaN: - MOVD $0, ret+0(FP) - RET - -// MVN: bitwise logical NOT -// This case simulates an application that modified R0-R30. -#define TWIDDLE_REGS() \ - MVN R0, R0; \ - MVN R1, R1; \ - MVN R2, R2; \ - MVN R3, R3; \ - MVN R4, R4; \ - MVN R5, R5; \ - MVN R6, R6; \ - MVN R7, R7; \ - MVN R8, R8; \ - MVN R9, R9; \ - MVN R10, R10; \ - MVN R11, R11; \ - MVN R12, R12; \ - MVN R13, R13; \ - MVN R14, R14; \ - MVN R15, R15; \ - MVN R16, R16; \ - MVN R17, R17; \ - MVN R18_PLATFORM, R18_PLATFORM; \ - MVN R19, R19; \ - MVN R20, R20; \ - MVN R21, R21; \ - MVN R22, R22; \ - MVN R23, R23; \ - MVN R24, R24; \ - MVN R25, R25; \ - MVN R26, R26; \ - MVN R27, R27; \ - MVN g, g; \ - MVN R29, R29; \ - MVN R30, R30; - -TEXT ·TwiddleRegsSyscall(SB),NOSPLIT,$0 - TWIDDLE_REGS() - MSR R10, TPIDR_EL0 - // Trapped in el0_svc. - SVC - RET // never reached - -TEXT ·TwiddleRegsFault(SB),NOSPLIT,$0 - TWIDDLE_REGS() - MSR R10, TPIDR_EL0 - // Trapped in el0_ia. - // Branch to Register branches unconditionally to an address in <Rn>. - JMP (R6) // <=> br x6, must fault - RET // never reached diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go deleted file mode 100644 index 1f4a774f3..000000000 --- a/pkg/sentry/platform/kvm/virtual_map_test.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kvm - -import ( - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/hostarch" -) - -type checker struct { - ok bool - accessType hostarch.AccessType -} - -func (c *checker) Containing(addr uintptr) func(virtualRegion) { - c.ok = false // Reset for below calls. - return func(vr virtualRegion) { - if vr.virtual <= addr && addr < vr.virtual+vr.length { - c.ok = true - c.accessType = vr.accessType - } - } -} - -func TestParseMaps(t *testing.T) { - c := new(checker) - - // Simple test. - if err := applyVirtualRegions(c.Containing(0)); err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // MMap a new page. - addr, _, errno := unix.RawSyscall6( - unix.SYS_MMAP, 0, hostarch.PageSize, - unix.PROT_READ|unix.PROT_WRITE, - unix.MAP_ANONYMOUS|unix.MAP_PRIVATE, 0, 0) - if errno != 0 { - t.Fatalf("unexpected map error: %v", errno) - } - - // Re-parse maps. - if err := applyVirtualRegions(c.Containing(addr)); err != nil { - unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0) - t.Fatalf("unexpected error: %v", err) - } - - // Assert that it now does contain the region. - if !c.ok { - unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0) - t.Fatalf("updated map does not contain 0x%08x, expected true", addr) - } - - // Map the region as PROT_NONE. - newAddr, _, errno := unix.RawSyscall6( - unix.SYS_MMAP, addr, hostarch.PageSize, - unix.PROT_NONE, - unix.MAP_ANONYMOUS|unix.MAP_FIXED|unix.MAP_PRIVATE, 0, 0) - if errno != 0 { - t.Fatalf("unexpected map error: %v", errno) - } - if newAddr != addr { - t.Fatalf("unable to remap address: got 0x%08x, wanted 0x%08x", newAddr, addr) - } - - // Re-parse maps. - if err := applyVirtualRegions(c.Containing(addr)); err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !c.ok { - t.Fatalf("final map does not contain 0x%08x, expected true", addr) - } - if c.accessType.Read || c.accessType.Write || c.accessType.Execute { - t.Fatalf("final map has incorrect permissions for 0x%08x", addr) - } - - // Unmap the region. - unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0) -} diff --git a/pkg/sentry/platform/platform_state_autogen.go b/pkg/sentry/platform/platform_state_autogen.go new file mode 100644 index 000000000..7a4d04589 --- /dev/null +++ b/pkg/sentry/platform/platform_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package platform diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD deleted file mode 100644 index d101f2f53..000000000 --- a/pkg/sentry/platform/ptrace/BUILD +++ /dev/null @@ -1,41 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "ptrace", - srcs = [ - "filters.go", - "ptrace.go", - "ptrace_amd64.go", - "ptrace_arm64.go", - "ptrace_arm64_unsafe.go", - "ptrace_unsafe.go", - "stub_amd64.s", - "stub_arm64.s", - "stub_unsafe.go", - "subprocess.go", - "subprocess_amd64.go", - "subprocess_arm64.go", - "subprocess_linux.go", - "subprocess_linux_unsafe.go", - "subprocess_unsafe.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/hostarch", - "//pkg/log", - "//pkg/procid", - "//pkg/safecopy", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/arch/fpu", - "//pkg/sentry/memmap", - "//pkg/sentry/platform", - "//pkg/sentry/platform/interrupt", - "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/platform/ptrace/ptrace_amd64_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_amd64_state_autogen.go new file mode 100644 index 000000000..9a5228154 --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 +// +build amd64 + +package ptrace diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_arm64_state_autogen.go new file mode 100644 index 000000000..477523841 --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package ptrace diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe_state_autogen.go new file mode 100644 index 000000000..477523841 --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package ptrace diff --git a/pkg/sentry/platform/ptrace/ptrace_linux_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_linux_state_autogen.go new file mode 100644 index 000000000..fcfee0226 --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_linux_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build linux +// +build linux + +package ptrace diff --git a/pkg/sentry/platform/ptrace/ptrace_linux_unsafe_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_linux_unsafe_state_autogen.go new file mode 100644 index 000000000..bf3fccfcb --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_linux_unsafe_state_autogen.go @@ -0,0 +1,7 @@ +// automatically generated by stateify. + +//go:build linux && (amd64 || arm64) +// +build linux +// +build amd64 arm64 + +package ptrace diff --git a/pkg/sentry/platform/ptrace/ptrace_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_state_autogen.go new file mode 100644 index 000000000..1bf0526f9 --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package ptrace diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_unsafe_state_autogen.go new file mode 100644 index 000000000..66758e4ba --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_unsafe_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 + +package ptrace diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sentry/sighandling/BUILD deleted file mode 100644 index 1790d57c9..000000000 --- a/pkg/sentry/sighandling/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "sighandling", - srcs = [ - "sighandling.go", - "sighandling_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/sighandling/sighandling_state_autogen.go b/pkg/sentry/sighandling/sighandling_state_autogen.go new file mode 100644 index 000000000..da9d96382 --- /dev/null +++ b/pkg/sentry/sighandling/sighandling_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package sighandling diff --git a/pkg/sentry/sighandling/sighandling_unsafe_state_autogen.go b/pkg/sentry/sighandling/sighandling_unsafe_state_autogen.go new file mode 100644 index 000000000..da9d96382 --- /dev/null +++ b/pkg/sentry/sighandling/sighandling_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package sighandling diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD deleted file mode 100644 index 7ee89a735..000000000 --- a/pkg/sentry/socket/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "socket", - srcs = ["socket.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/hostarch", - "//pkg/marshal", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/tcpip/header", - "//pkg/usermem", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD deleted file mode 100644 index b2fc84181..000000000 --- a/pkg/sentry/socket/control/BUILD +++ /dev/null @@ -1,44 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "control", - srcs = [ - "control.go", - "control_vfs2.go", - ], - imports = [ - "gvisor.dev/gvisor/pkg/sentry/fs", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - ], -) - -go_test( - name = "control_test", - size = "small", - srcs = ["control_test.go"], - library = ":control", - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/hostarch", - "//pkg/sentry/socket", - "@com_github_google_go_cmp//cmp:go_default_library", - ], -) diff --git a/pkg/sentry/socket/control/control_state_autogen.go b/pkg/sentry/socket/control/control_state_autogen.go new file mode 100644 index 000000000..412025601 --- /dev/null +++ b/pkg/sentry/socket/control/control_state_autogen.go @@ -0,0 +1,60 @@ +// automatically generated by stateify. + +package control + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (fs *RightsFiles) StateTypeName() string { + return "pkg/sentry/socket/control.RightsFiles" +} + +func (fs *RightsFiles) StateFields() []string { + return nil +} + +func (c *scmCredentials) StateTypeName() string { + return "pkg/sentry/socket/control.scmCredentials" +} + +func (c *scmCredentials) StateFields() []string { + return []string{ + "t", + "kuid", + "kgid", + } +} + +func (c *scmCredentials) beforeSave() {} + +// +checklocksignore +func (c *scmCredentials) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.t) + stateSinkObject.Save(1, &c.kuid) + stateSinkObject.Save(2, &c.kgid) +} + +func (c *scmCredentials) afterLoad() {} + +// +checklocksignore +func (c *scmCredentials) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.t) + stateSourceObject.Load(1, &c.kuid) + stateSourceObject.Load(2, &c.kgid) +} + +func (fs *RightsFilesVFS2) StateTypeName() string { + return "pkg/sentry/socket/control.RightsFilesVFS2" +} + +func (fs *RightsFilesVFS2) StateFields() []string { + return nil +} + +func init() { + state.Register((*RightsFiles)(nil)) + state.Register((*scmCredentials)(nil)) + state.Register((*RightsFilesVFS2)(nil)) +} diff --git a/pkg/sentry/socket/control/control_test.go b/pkg/sentry/socket/control/control_test.go deleted file mode 100644 index 7e28a0cef..000000000 --- a/pkg/sentry/socket/control/control_test.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package control provides internal representations of socket control -// messages. -package control - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/sentry/socket" -) - -func TestParse(t *testing.T) { - // Craft the control message to parse. - length := linux.SizeOfControlMessageHeader + linux.SizeOfTimeval - hdr := linux.ControlMessageHeader{ - Length: uint64(length), - Level: linux.SOL_SOCKET, - Type: linux.SO_TIMESTAMP, - } - buf := make([]byte, 0, length) - buf = binary.Marshal(buf, hostarch.ByteOrder, &hdr) - ts := linux.Timeval{ - Sec: 2401, - Usec: 343, - } - buf = binary.Marshal(buf, hostarch.ByteOrder, &ts) - - cmsg, err := Parse(nil, nil, buf, 8 /* width */) - if err != nil { - t.Fatalf("Parse(_, _, %+v, _): %v", cmsg, err) - } - - want := socket.ControlMessages{ - IP: socket.IPControlMessages{ - HasTimestamp: true, - Timestamp: ts.ToNsecCapped(), - }, - } - if diff := cmp.Diff(want, cmsg); diff != "" { - t.Errorf("unexpected message parsed, (-want, +got):\n%s", diff) - } -} diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD deleted file mode 100644 index 3950caa0f..000000000 --- a/pkg/sentry/socket/hostinet/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "hostinet", - srcs = [ - "device.go", - "hostinet.go", - "save_restore.go", - "socket.go", - "socket_unsafe.go", - "socket_vfs2.go", - "sockopt_impl.go", - "stack.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fdnotifier", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fsimpl/sockfs", - "//pkg/sentry/hostfd", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/control", - "//pkg/sentry/vfs", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/stack", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/hostinet/hostinet_impl_state_autogen.go b/pkg/sentry/socket/hostinet/hostinet_impl_state_autogen.go new file mode 100644 index 000000000..e52b4a6d6 --- /dev/null +++ b/pkg/sentry/socket/hostinet/hostinet_impl_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build go1.1 +// +build go1.1 + +package hostinet diff --git a/pkg/sentry/socket/hostinet/hostinet_state_autogen.go b/pkg/sentry/socket/hostinet/hostinet_state_autogen.go new file mode 100644 index 000000000..519c65339 --- /dev/null +++ b/pkg/sentry/socket/hostinet/hostinet_state_autogen.go @@ -0,0 +1,89 @@ +// automatically generated by stateify. + +package hostinet + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *socketOpsCommon) StateTypeName() string { + return "pkg/sentry/socket/hostinet.socketOpsCommon" +} + +func (s *socketOpsCommon) StateFields() []string { + return []string{ + "SendReceiveTimeout", + "family", + "stype", + "protocol", + "queue", + "fd", + } +} + +func (s *socketOpsCommon) beforeSave() {} + +// +checklocksignore +func (s *socketOpsCommon) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SendReceiveTimeout) + stateSinkObject.Save(1, &s.family) + stateSinkObject.Save(2, &s.stype) + stateSinkObject.Save(3, &s.protocol) + stateSinkObject.Save(4, &s.queue) + stateSinkObject.Save(5, &s.fd) +} + +func (s *socketOpsCommon) afterLoad() {} + +// +checklocksignore +func (s *socketOpsCommon) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SendReceiveTimeout) + stateSourceObject.Load(1, &s.family) + stateSourceObject.Load(2, &s.stype) + stateSourceObject.Load(3, &s.protocol) + stateSourceObject.Load(4, &s.queue) + stateSourceObject.Load(5, &s.fd) +} + +func (s *socketVFS2) StateTypeName() string { + return "pkg/sentry/socket/hostinet.socketVFS2" +} + +func (s *socketVFS2) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "LockFD", + "DentryMetadataFileDescriptionImpl", + "socketOpsCommon", + } +} + +func (s *socketVFS2) beforeSave() {} + +// +checklocksignore +func (s *socketVFS2) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.vfsfd) + stateSinkObject.Save(1, &s.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &s.LockFD) + stateSinkObject.Save(3, &s.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(4, &s.socketOpsCommon) +} + +func (s *socketVFS2) afterLoad() {} + +// +checklocksignore +func (s *socketVFS2) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.vfsfd) + stateSourceObject.Load(1, &s.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &s.LockFD) + stateSourceObject.Load(3, &s.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(4, &s.socketOpsCommon) +} + +func init() { + state.Register((*socketOpsCommon)(nil)) + state.Register((*socketVFS2)(nil)) +} diff --git a/pkg/sentry/socket/hostinet/hostinet_unsafe_state_autogen.go b/pkg/sentry/socket/hostinet/hostinet_unsafe_state_autogen.go new file mode 100644 index 000000000..b0a59ba93 --- /dev/null +++ b/pkg/sentry/socket/hostinet/hostinet_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package hostinet diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD deleted file mode 100644 index 608474fa1..000000000 --- a/pkg/sentry/socket/netfilter/BUILD +++ /dev/null @@ -1,34 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "netfilter", - srcs = [ - "extensions.go", - "ipv4.go", - "ipv6.go", - "netfilter.go", - "owner_matcher.go", - "targets.go", - "tcp_matcher.go", - "udp_matcher.go", - ], - marshal = True, - # This target depends on netstack and should only be used by epsocket, - # which is allowed to depend on netstack. - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/tcpip/header", - "//pkg/tcpip/stack", - ], -) diff --git a/pkg/sentry/socket/netfilter/netfilter_abi_autogen_unsafe.go b/pkg/sentry/socket/netfilter/netfilter_abi_autogen_unsafe.go new file mode 100644 index 000000000..786b62935 --- /dev/null +++ b/pkg/sentry/socket/netfilter/netfilter_abi_autogen_unsafe.go @@ -0,0 +1,150 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package netfilter + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*linux.NFNATRange)(nil) +var _ marshal.Marshallable = (*linux.XTEntryTarget)(nil) +var _ marshal.Marshallable = (*nfNATTarget)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (n *nfNATTarget) SizeBytes() int { + return 0 + + (*linux.XTEntryTarget)(nil).SizeBytes() + + (*linux.NFNATRange)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (n *nfNATTarget) MarshalBytes(dst []byte) { + n.Target.MarshalBytes(dst[:n.Target.SizeBytes()]) + dst = dst[n.Target.SizeBytes():] + n.Range.MarshalBytes(dst[:n.Range.SizeBytes()]) + dst = dst[n.Range.SizeBytes():] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (n *nfNATTarget) UnmarshalBytes(src []byte) { + n.Target.UnmarshalBytes(src[:n.Target.SizeBytes()]) + src = src[n.Target.SizeBytes():] + n.Range.UnmarshalBytes(src[:n.Range.SizeBytes()]) + src = src[n.Range.SizeBytes():] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (n *nfNATTarget) Packed() bool { + return n.Range.Packed() && n.Target.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (n *nfNATTarget) MarshalUnsafe(dst []byte) { + if n.Range.Packed() && n.Target.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(n), uintptr(n.SizeBytes())) + } else { + // Type nfNATTarget doesn't have a packed layout in memory, fallback to MarshalBytes. + n.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (n *nfNATTarget) UnmarshalUnsafe(src []byte) { + if n.Range.Packed() && n.Target.Packed() { + gohacks.Memmove(unsafe.Pointer(n), unsafe.Pointer(&src[0]), uintptr(n.SizeBytes())) + } else { + // Type nfNATTarget doesn't have a packed layout in memory, fallback to UnmarshalBytes. + n.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (n *nfNATTarget) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !n.Range.Packed() && n.Target.Packed() { + // Type nfNATTarget doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(n.SizeBytes()) // escapes: okay. + n.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(n))) + hdr.Len = n.SizeBytes() + hdr.Cap = n.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that n + // must live until the use above. + runtime.KeepAlive(n) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (n *nfNATTarget) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return n.CopyOutN(cc, addr, n.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (n *nfNATTarget) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !n.Range.Packed() && n.Target.Packed() { + // Type nfNATTarget doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(n.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + n.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(n))) + hdr.Len = n.SizeBytes() + hdr.Cap = n.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that n + // must live until the use above. + runtime.KeepAlive(n) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (n *nfNATTarget) WriteTo(writer io.Writer) (int64, error) { + if !n.Range.Packed() && n.Target.Packed() { + // Type nfNATTarget doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, n.SizeBytes()) + n.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(n))) + hdr.Len = n.SizeBytes() + hdr.Cap = n.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that n + // must live until the use above. + runtime.KeepAlive(n) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/socket/netfilter/netfilter_state_autogen.go b/pkg/sentry/socket/netfilter/netfilter_state_autogen.go new file mode 100644 index 000000000..6e95d89a4 --- /dev/null +++ b/pkg/sentry/socket/netfilter/netfilter_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package netfilter diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD deleted file mode 100644 index ed85404da..000000000 --- a/pkg/sentry/socket/netlink/BUILD +++ /dev/null @@ -1,58 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "netlink", - srcs = [ - "message.go", - "provider.go", - "provider_vfs2.go", - "socket.go", - "socket_vfs2.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/abi/linux/errno", - "//pkg/bits", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fsimpl/sockfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/netlink/port", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "netlink_test", - size = "small", - srcs = [ - "message_test.go", - ], - deps = [ - ":netlink", - "//pkg/abi/linux", - "//pkg/marshal", - "//pkg/marshal/primitive", - ], -) diff --git a/pkg/sentry/socket/netlink/message_test.go b/pkg/sentry/socket/netlink/message_test.go deleted file mode 100644 index 968968469..000000000 --- a/pkg/sentry/socket/netlink/message_test.go +++ /dev/null @@ -1,330 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package message_test - -import ( - "bytes" - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/marshal/primitive" - "gvisor.dev/gvisor/pkg/sentry/socket/netlink" -) - -type dummyNetlinkMsg struct { - marshal.StubMarshallable - Foo uint16 -} - -func (*dummyNetlinkMsg) SizeBytes() int { - return 2 -} - -func (m *dummyNetlinkMsg) MarshalUnsafe(dst []byte) { - p := primitive.Uint16(m.Foo) - p.MarshalUnsafe(dst) -} - -func (m *dummyNetlinkMsg) UnmarshalUnsafe(src []byte) { - var p primitive.Uint16 - p.UnmarshalUnsafe(src) - m.Foo = uint16(p) -} - -func TestParseMessage(t *testing.T) { - tests := []struct { - desc string - input []byte - - header linux.NetlinkMessageHeader - dataMsg *dummyNetlinkMsg - restLen int - ok bool - }{ - { - desc: "valid", - input: []byte{ - 0x14, 0x00, 0x00, 0x00, // Length - 0x01, 0x00, // Type - 0x02, 0x00, // Flags - 0x03, 0x00, 0x00, 0x00, // Seq - 0x04, 0x00, 0x00, 0x00, // PortID - 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding - }, - header: linux.NetlinkMessageHeader{ - Length: 20, - Type: 1, - Flags: 2, - Seq: 3, - PortID: 4, - }, - dataMsg: &dummyNetlinkMsg{ - Foo: 0x3130, - }, - restLen: 0, - ok: true, - }, - { - desc: "valid with next message", - input: []byte{ - 0x14, 0x00, 0x00, 0x00, // Length - 0x01, 0x00, // Type - 0x02, 0x00, // Flags - 0x03, 0x00, 0x00, 0x00, // Seq - 0x04, 0x00, 0x00, 0x00, // PortID - 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding - 0xFF, // Next message (rest) - }, - header: linux.NetlinkMessageHeader{ - Length: 20, - Type: 1, - Flags: 2, - Seq: 3, - PortID: 4, - }, - dataMsg: &dummyNetlinkMsg{ - Foo: 0x3130, - }, - restLen: 1, - ok: true, - }, - { - desc: "valid for last message without padding", - input: []byte{ - 0x12, 0x00, 0x00, 0x00, // Length - 0x01, 0x00, // Type - 0x02, 0x00, // Flags - 0x03, 0x00, 0x00, 0x00, // Seq - 0x04, 0x00, 0x00, 0x00, // PortID - 0x30, 0x31, // Data message - }, - header: linux.NetlinkMessageHeader{ - Length: 18, - Type: 1, - Flags: 2, - Seq: 3, - PortID: 4, - }, - dataMsg: &dummyNetlinkMsg{ - Foo: 0x3130, - }, - restLen: 0, - ok: true, - }, - { - desc: "valid for last message not to be aligned", - input: []byte{ - 0x13, 0x00, 0x00, 0x00, // Length - 0x01, 0x00, // Type - 0x02, 0x00, // Flags - 0x03, 0x00, 0x00, 0x00, // Seq - 0x04, 0x00, 0x00, 0x00, // PortID - 0x30, 0x31, // Data message - 0x00, // Excessive 1 byte permitted at end - }, - header: linux.NetlinkMessageHeader{ - Length: 19, - Type: 1, - Flags: 2, - Seq: 3, - PortID: 4, - }, - dataMsg: &dummyNetlinkMsg{ - Foo: 0x3130, - }, - restLen: 0, - ok: true, - }, - { - desc: "header.Length too short", - input: []byte{ - 0x04, 0x00, 0x00, 0x00, // Length - 0x01, 0x00, // Type - 0x02, 0x00, // Flags - 0x03, 0x00, 0x00, 0x00, // Seq - 0x04, 0x00, 0x00, 0x00, // PortID - 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding - }, - ok: false, - }, - { - desc: "header.Length too long", - input: []byte{ - 0xFF, 0xFF, 0x00, 0x00, // Length - 0x01, 0x00, // Type - 0x02, 0x00, // Flags - 0x03, 0x00, 0x00, 0x00, // Seq - 0x04, 0x00, 0x00, 0x00, // PortID - 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding - }, - ok: false, - }, - { - desc: "header incomplete", - input: []byte{ - 0x04, 0x00, 0x00, 0x00, // Length - }, - ok: false, - }, - { - desc: "empty message", - input: []byte{}, - ok: false, - }, - } - for _, test := range tests { - msg, rest, ok := netlink.ParseMessage(test.input) - if ok != test.ok { - t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok) - continue - } - if !test.ok { - continue - } - if !reflect.DeepEqual(msg.Header(), test.header) { - t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, msg.Header(), test.header) - } - - dataMsg := &dummyNetlinkMsg{} - _, dataOk := msg.GetData(dataMsg) - if !dataOk { - t.Errorf("%v: GetData.ok = %v, want = true", test.desc, dataOk) - } else if !reflect.DeepEqual(dataMsg, test.dataMsg) { - t.Errorf("%v: GetData.msg = %+v, want = %+v", test.desc, dataMsg, test.dataMsg) - } - - if got, want := rest, test.input[len(test.input)-test.restLen:]; !bytes.Equal(got, want) { - t.Errorf("%v: got rest = %v, want = %v", test.desc, got, want) - } - } -} - -func TestAttrView(t *testing.T) { - tests := []struct { - desc string - input []byte - - // Outputs for ParseFirst. - hdr linux.NetlinkAttrHeader - value []byte - restLen int - ok bool - - // Outputs for Empty. - isEmpty bool - }{ - { - desc: "valid", - input: []byte{ - 0x06, 0x00, // Length - 0x01, 0x00, // Type - 0x30, 0x31, 0x00, 0x00, // Data with 2 bytes padding - }, - hdr: linux.NetlinkAttrHeader{ - Length: 6, - Type: 1, - }, - value: []byte{0x30, 0x31}, - restLen: 0, - ok: true, - isEmpty: false, - }, - { - desc: "at alignment", - input: []byte{ - 0x08, 0x00, // Length - 0x01, 0x00, // Type - 0x30, 0x31, 0x32, 0x33, // Data - }, - hdr: linux.NetlinkAttrHeader{ - Length: 8, - Type: 1, - }, - value: []byte{0x30, 0x31, 0x32, 0x33}, - restLen: 0, - ok: true, - isEmpty: false, - }, - { - desc: "at alignment with rest data", - input: []byte{ - 0x08, 0x00, // Length - 0x01, 0x00, // Type - 0x30, 0x31, 0x32, 0x33, // Data - 0xFF, 0xFE, // Rest data - }, - hdr: linux.NetlinkAttrHeader{ - Length: 8, - Type: 1, - }, - value: []byte{0x30, 0x31, 0x32, 0x33}, - restLen: 2, - ok: true, - isEmpty: false, - }, - { - desc: "hdr.Length too long", - input: []byte{ - 0xFF, 0x00, // Length - 0x01, 0x00, // Type - 0x30, 0x31, 0x32, 0x33, // Data - }, - ok: false, - isEmpty: false, - }, - { - desc: "hdr.Length too short", - input: []byte{ - 0x01, 0x00, // Length - 0x01, 0x00, // Type - 0x30, 0x31, 0x32, 0x33, // Data - }, - ok: false, - isEmpty: false, - }, - { - desc: "empty", - input: []byte{}, - ok: false, - isEmpty: true, - }, - } - for _, test := range tests { - attrs := netlink.AttrsView(test.input) - - // Test ParseFirst(). - hdr, value, rest, ok := attrs.ParseFirst() - if ok != test.ok { - t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok) - } else if test.ok { - if !reflect.DeepEqual(hdr, test.hdr) { - t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, hdr, test.hdr) - } - if !bytes.Equal(value, test.value) { - t.Errorf("%v: got value = %v, want = %v", test.desc, value, test.value) - } - if wantRest := test.input[len(test.input)-test.restLen:]; !bytes.Equal(rest, wantRest) { - t.Errorf("%v: got rest = %v, want = %v", test.desc, rest, wantRest) - } - } - - // Test Empty(). - if got, want := attrs.Empty(), test.isEmpty; got != want { - t.Errorf("%v: got empty = %v, want = %v", test.desc, got, want) - } - } -} diff --git a/pkg/sentry/socket/netlink/netlink_state_autogen.go b/pkg/sentry/socket/netlink/netlink_state_autogen.go new file mode 100644 index 000000000..29c549880 --- /dev/null +++ b/pkg/sentry/socket/netlink/netlink_state_autogen.go @@ -0,0 +1,149 @@ +// automatically generated by stateify. + +package netlink + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *Socket) StateTypeName() string { + return "pkg/sentry/socket/netlink.Socket" +} + +func (s *Socket) StateFields() []string { + return []string{ + "socketOpsCommon", + } +} + +func (s *Socket) beforeSave() {} + +// +checklocksignore +func (s *Socket) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.socketOpsCommon) +} + +func (s *Socket) afterLoad() {} + +// +checklocksignore +func (s *Socket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.socketOpsCommon) +} + +func (s *socketOpsCommon) StateTypeName() string { + return "pkg/sentry/socket/netlink.socketOpsCommon" +} + +func (s *socketOpsCommon) StateFields() []string { + return []string{ + "SendReceiveTimeout", + "ports", + "protocol", + "skType", + "ep", + "connection", + "bound", + "portID", + "sendBufferSize", + "filter", + } +} + +func (s *socketOpsCommon) beforeSave() {} + +// +checklocksignore +func (s *socketOpsCommon) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SendReceiveTimeout) + stateSinkObject.Save(1, &s.ports) + stateSinkObject.Save(2, &s.protocol) + stateSinkObject.Save(3, &s.skType) + stateSinkObject.Save(4, &s.ep) + stateSinkObject.Save(5, &s.connection) + stateSinkObject.Save(6, &s.bound) + stateSinkObject.Save(7, &s.portID) + stateSinkObject.Save(8, &s.sendBufferSize) + stateSinkObject.Save(9, &s.filter) +} + +func (s *socketOpsCommon) afterLoad() {} + +// +checklocksignore +func (s *socketOpsCommon) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SendReceiveTimeout) + stateSourceObject.Load(1, &s.ports) + stateSourceObject.Load(2, &s.protocol) + stateSourceObject.Load(3, &s.skType) + stateSourceObject.Load(4, &s.ep) + stateSourceObject.Load(5, &s.connection) + stateSourceObject.Load(6, &s.bound) + stateSourceObject.Load(7, &s.portID) + stateSourceObject.Load(8, &s.sendBufferSize) + stateSourceObject.Load(9, &s.filter) +} + +func (k *kernelSCM) StateTypeName() string { + return "pkg/sentry/socket/netlink.kernelSCM" +} + +func (k *kernelSCM) StateFields() []string { + return []string{} +} + +func (k *kernelSCM) beforeSave() {} + +// +checklocksignore +func (k *kernelSCM) StateSave(stateSinkObject state.Sink) { + k.beforeSave() +} + +func (k *kernelSCM) afterLoad() {} + +// +checklocksignore +func (k *kernelSCM) StateLoad(stateSourceObject state.Source) { +} + +func (s *SocketVFS2) StateTypeName() string { + return "pkg/sentry/socket/netlink.SocketVFS2" +} + +func (s *SocketVFS2) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "LockFD", + "socketOpsCommon", + } +} + +func (s *SocketVFS2) beforeSave() {} + +// +checklocksignore +func (s *SocketVFS2) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.vfsfd) + stateSinkObject.Save(1, &s.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &s.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &s.LockFD) + stateSinkObject.Save(4, &s.socketOpsCommon) +} + +func (s *SocketVFS2) afterLoad() {} + +// +checklocksignore +func (s *SocketVFS2) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.vfsfd) + stateSourceObject.Load(1, &s.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &s.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &s.LockFD) + stateSourceObject.Load(4, &s.socketOpsCommon) +} + +func init() { + state.Register((*Socket)(nil)) + state.Register((*socketOpsCommon)(nil)) + state.Register((*kernelSCM)(nil)) + state.Register((*SocketVFS2)(nil)) +} diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD deleted file mode 100644 index 3a22923d8..000000000 --- a/pkg/sentry/socket/netlink/port/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "port", - srcs = ["port.go"], - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/sync"], -) - -go_test( - name = "port_test", - srcs = ["port_test.go"], - library = ":port", -) diff --git a/pkg/sentry/socket/netlink/port/port_state_autogen.go b/pkg/sentry/socket/netlink/port/port_state_autogen.go new file mode 100644 index 000000000..b22471899 --- /dev/null +++ b/pkg/sentry/socket/netlink/port/port_state_autogen.go @@ -0,0 +1,36 @@ +// automatically generated by stateify. + +package port + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (m *Manager) StateTypeName() string { + return "pkg/sentry/socket/netlink/port.Manager" +} + +func (m *Manager) StateFields() []string { + return []string{ + "ports", + } +} + +func (m *Manager) beforeSave() {} + +// +checklocksignore +func (m *Manager) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.ports) +} + +func (m *Manager) afterLoad() {} + +// +checklocksignore +func (m *Manager) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.ports) +} + +func init() { + state.Register((*Manager)(nil)) +} diff --git a/pkg/sentry/socket/netlink/port/port_test.go b/pkg/sentry/socket/netlink/port/port_test.go deleted file mode 100644 index 516f6cd6c..000000000 --- a/pkg/sentry/socket/netlink/port/port_test.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package port - -import ( - "testing" -) - -func TestAllocateHint(t *testing.T) { - m := New() - - // We can get the hint port. - p, ok := m.Allocate(0, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p != 1 { - t.Errorf("m.Allocate(0, 1) got %d want 1", p) - } - - // Hint is taken. - p, ok = m.Allocate(0, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p == 1 { - t.Errorf("m.Allocate(0, 1) got 1 want anything else") - } - - // Hint is available for a different protocol. - p, ok = m.Allocate(1, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p != 1 { - t.Errorf("m.Allocate(1, 1) got %d want 1", p) - } - - m.Release(0, 1) - - // Hint is available again after release. - p, ok = m.Allocate(0, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p != 1 { - t.Errorf("m.Allocate(0, 1) got %d want 1", p) - } -} - -func TestAllocateExhausted(t *testing.T) { - m := New() - - // Fill all ports (0 is already reserved). - for i := int32(1); i < maxPorts; i++ { - p, ok := m.Allocate(0, i) - if !ok { - t.Fatalf("m.Allocate got !ok want ok") - } - if p != i { - t.Fatalf("m.Allocate(0, %d) got %d want %d", i, p, i) - } - } - - // Now no more can be allocated. - p, ok := m.Allocate(0, 1) - if ok { - t.Errorf("m.Allocate got %d, ok want !ok", p) - } -} diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD deleted file mode 100644 index c6c04b4e3..000000000 --- a/pkg/sentry/socket/netlink/route/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "route", - srcs = [ - "protocol.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/marshal/primitive", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/socket/netlink", - "//pkg/syserr", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/netlink/route/route_state_autogen.go b/pkg/sentry/socket/netlink/route/route_state_autogen.go new file mode 100644 index 000000000..c4a94ab49 --- /dev/null +++ b/pkg/sentry/socket/netlink/route/route_state_autogen.go @@ -0,0 +1,32 @@ +// automatically generated by stateify. + +package route + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *Protocol) StateTypeName() string { + return "pkg/sentry/socket/netlink/route.Protocol" +} + +func (p *Protocol) StateFields() []string { + return []string{} +} + +func (p *Protocol) beforeSave() {} + +// +checklocksignore +func (p *Protocol) StateSave(stateSinkObject state.Sink) { + p.beforeSave() +} + +func (p *Protocol) afterLoad() {} + +// +checklocksignore +func (p *Protocol) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*Protocol)(nil)) +} diff --git a/pkg/sentry/socket/netlink/uevent/BUILD b/pkg/sentry/socket/netlink/uevent/BUILD deleted file mode 100644 index b6434923c..000000000 --- a/pkg/sentry/socket/netlink/uevent/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "uevent", - srcs = ["protocol.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/kernel", - "//pkg/sentry/socket/netlink", - "//pkg/syserr", - ], -) diff --git a/pkg/sentry/socket/netlink/uevent/uevent_state_autogen.go b/pkg/sentry/socket/netlink/uevent/uevent_state_autogen.go new file mode 100644 index 000000000..f45d63d9a --- /dev/null +++ b/pkg/sentry/socket/netlink/uevent/uevent_state_autogen.go @@ -0,0 +1,32 @@ +// automatically generated by stateify. + +package uevent + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *Protocol) StateTypeName() string { + return "pkg/sentry/socket/netlink/uevent.Protocol" +} + +func (p *Protocol) StateFields() []string { + return []string{} +} + +func (p *Protocol) beforeSave() {} + +// +checklocksignore +func (p *Protocol) StateSave(stateSinkObject state.Sink) { + p.beforeSave() +} + +func (p *Protocol) afterLoad() {} + +// +checklocksignore +func (p *Protocol) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*Protocol)(nil)) +} diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD deleted file mode 100644 index e828982eb..000000000 --- a/pkg/sentry/socket/netstack/BUILD +++ /dev/null @@ -1,58 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "netstack", - srcs = [ - "device.go", - "netstack.go", - "netstack_vfs2.go", - "provider.go", - "provider_vfs2.go", - "save_restore.go", - "stack.go", - "tun.go", - ], - visibility = [ - "//pkg/sentry:internal", - ], - deps = [ - "//pkg/abi/linux", - "//pkg/abi/linux/errno", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/metric", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fsimpl/sockfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/netfilter", - "//pkg/sentry/unimpl", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/header", - "//pkg/tcpip/link/tun", - "//pkg/tcpip/network/ipv4", - "//pkg/tcpip/network/ipv6", - "//pkg/tcpip/stack", - "//pkg/tcpip/transport/tcp", - "//pkg/tcpip/transport/udp", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/netstack/netstack_state_autogen.go b/pkg/sentry/socket/netstack/netstack_state_autogen.go new file mode 100644 index 000000000..335437f04 --- /dev/null +++ b/pkg/sentry/socket/netstack/netstack_state_autogen.go @@ -0,0 +1,148 @@ +// automatically generated by stateify. + +package netstack + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SocketOperations) StateTypeName() string { + return "pkg/sentry/socket/netstack.SocketOperations" +} + +func (s *SocketOperations) StateFields() []string { + return []string{ + "socketOpsCommon", + } +} + +func (s *SocketOperations) beforeSave() {} + +// +checklocksignore +func (s *SocketOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.socketOpsCommon) +} + +func (s *SocketOperations) afterLoad() {} + +// +checklocksignore +func (s *SocketOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.socketOpsCommon) +} + +func (s *socketOpsCommon) StateTypeName() string { + return "pkg/sentry/socket/netstack.socketOpsCommon" +} + +func (s *socketOpsCommon) StateFields() []string { + return []string{ + "SendReceiveTimeout", + "Queue", + "family", + "Endpoint", + "skType", + "protocol", + "sockOptTimestamp", + "timestampValid", + "timestampNS", + "sockOptInq", + } +} + +func (s *socketOpsCommon) beforeSave() {} + +// +checklocksignore +func (s *socketOpsCommon) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SendReceiveTimeout) + stateSinkObject.Save(1, &s.Queue) + stateSinkObject.Save(2, &s.family) + stateSinkObject.Save(3, &s.Endpoint) + stateSinkObject.Save(4, &s.skType) + stateSinkObject.Save(5, &s.protocol) + stateSinkObject.Save(6, &s.sockOptTimestamp) + stateSinkObject.Save(7, &s.timestampValid) + stateSinkObject.Save(8, &s.timestampNS) + stateSinkObject.Save(9, &s.sockOptInq) +} + +func (s *socketOpsCommon) afterLoad() {} + +// +checklocksignore +func (s *socketOpsCommon) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SendReceiveTimeout) + stateSourceObject.Load(1, &s.Queue) + stateSourceObject.Load(2, &s.family) + stateSourceObject.Load(3, &s.Endpoint) + stateSourceObject.Load(4, &s.skType) + stateSourceObject.Load(5, &s.protocol) + stateSourceObject.Load(6, &s.sockOptTimestamp) + stateSourceObject.Load(7, &s.timestampValid) + stateSourceObject.Load(8, &s.timestampNS) + stateSourceObject.Load(9, &s.sockOptInq) +} + +func (s *SocketVFS2) StateTypeName() string { + return "pkg/sentry/socket/netstack.SocketVFS2" +} + +func (s *SocketVFS2) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "LockFD", + "socketOpsCommon", + } +} + +func (s *SocketVFS2) beforeSave() {} + +// +checklocksignore +func (s *SocketVFS2) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.vfsfd) + stateSinkObject.Save(1, &s.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &s.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &s.LockFD) + stateSinkObject.Save(4, &s.socketOpsCommon) +} + +func (s *SocketVFS2) afterLoad() {} + +// +checklocksignore +func (s *SocketVFS2) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.vfsfd) + stateSourceObject.Load(1, &s.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &s.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &s.LockFD) + stateSourceObject.Load(4, &s.socketOpsCommon) +} + +func (s *Stack) StateTypeName() string { + return "pkg/sentry/socket/netstack.Stack" +} + +func (s *Stack) StateFields() []string { + return []string{} +} + +func (s *Stack) beforeSave() {} + +// +checklocksignore +func (s *Stack) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +// +checklocksignore +func (s *Stack) StateLoad(stateSourceObject state.Source) { + stateSourceObject.AfterLoad(s.afterLoad) +} + +func init() { + state.Register((*SocketOperations)(nil)) + state.Register((*socketOpsCommon)(nil)) + state.Register((*SocketVFS2)(nil)) + state.Register((*Stack)(nil)) +} diff --git a/pkg/sentry/socket/socket_state_autogen.go b/pkg/sentry/socket/socket_state_autogen.go new file mode 100644 index 000000000..d050093f6 --- /dev/null +++ b/pkg/sentry/socket/socket_state_autogen.go @@ -0,0 +1,98 @@ +// automatically generated by stateify. + +package socket + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *IPControlMessages) StateTypeName() string { + return "pkg/sentry/socket.IPControlMessages" +} + +func (i *IPControlMessages) StateFields() []string { + return []string{ + "HasTimestamp", + "Timestamp", + "HasInq", + "Inq", + "HasTOS", + "TOS", + "HasTClass", + "TClass", + "HasIPPacketInfo", + "PacketInfo", + "OriginalDstAddress", + "SockErr", + } +} + +func (i *IPControlMessages) beforeSave() {} + +// +checklocksignore +func (i *IPControlMessages) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.HasTimestamp) + stateSinkObject.Save(1, &i.Timestamp) + stateSinkObject.Save(2, &i.HasInq) + stateSinkObject.Save(3, &i.Inq) + stateSinkObject.Save(4, &i.HasTOS) + stateSinkObject.Save(5, &i.TOS) + stateSinkObject.Save(6, &i.HasTClass) + stateSinkObject.Save(7, &i.TClass) + stateSinkObject.Save(8, &i.HasIPPacketInfo) + stateSinkObject.Save(9, &i.PacketInfo) + stateSinkObject.Save(10, &i.OriginalDstAddress) + stateSinkObject.Save(11, &i.SockErr) +} + +func (i *IPControlMessages) afterLoad() {} + +// +checklocksignore +func (i *IPControlMessages) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.HasTimestamp) + stateSourceObject.Load(1, &i.Timestamp) + stateSourceObject.Load(2, &i.HasInq) + stateSourceObject.Load(3, &i.Inq) + stateSourceObject.Load(4, &i.HasTOS) + stateSourceObject.Load(5, &i.TOS) + stateSourceObject.Load(6, &i.HasTClass) + stateSourceObject.Load(7, &i.TClass) + stateSourceObject.Load(8, &i.HasIPPacketInfo) + stateSourceObject.Load(9, &i.PacketInfo) + stateSourceObject.Load(10, &i.OriginalDstAddress) + stateSourceObject.Load(11, &i.SockErr) +} + +func (to *SendReceiveTimeout) StateTypeName() string { + return "pkg/sentry/socket.SendReceiveTimeout" +} + +func (to *SendReceiveTimeout) StateFields() []string { + return []string{ + "send", + "recv", + } +} + +func (to *SendReceiveTimeout) beforeSave() {} + +// +checklocksignore +func (to *SendReceiveTimeout) StateSave(stateSinkObject state.Sink) { + to.beforeSave() + stateSinkObject.Save(0, &to.send) + stateSinkObject.Save(1, &to.recv) +} + +func (to *SendReceiveTimeout) afterLoad() {} + +// +checklocksignore +func (to *SendReceiveTimeout) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &to.send) + stateSourceObject.Load(1, &to.recv) +} + +func init() { + state.Register((*IPControlMessages)(nil)) + state.Register((*SendReceiveTimeout)(nil)) +} diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD deleted file mode 100644 index 5c3cdef6a..000000000 --- a/pkg/sentry/socket/unix/BUILD +++ /dev/null @@ -1,71 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "socket_refs", - out = "socket_refs.go", - package = "unix", - prefix = "socketOperations", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "SocketOperations", - }, -) - -go_template_instance( - name = "socket_vfs2_refs", - out = "socket_vfs2_refs.go", - package = "unix", - prefix = "socketVFS2", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "SocketVFS2", - }, -) - -go_library( - name = "unix", - srcs = [ - "device.go", - "io.go", - "socket_refs.go", - "socket_vfs2_refs.go", - "unix.go", - "unix_vfs2.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsimpl/sockfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/netstack", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/vfs", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go new file mode 100644 index 000000000..61c6bd17c --- /dev/null +++ b/pkg/sentry/socket/unix/socket_refs.go @@ -0,0 +1,140 @@ +package unix + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const socketOperationsenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var socketOperationsobj *SocketOperations + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type socketOperationsRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *socketOperationsRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *socketOperationsRefs) RefType() string { + return fmt.Sprintf("%T", socketOperationsobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *socketOperationsRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *socketOperationsRefs) LogRefs() bool { + return socketOperationsenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *socketOperationsRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *socketOperationsRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if socketOperationsenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *socketOperationsRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if socketOperationsenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *socketOperationsRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if socketOperationsenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *socketOperationsRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/socket/unix/socket_vfs2_refs.go b/pkg/sentry/socket/unix/socket_vfs2_refs.go new file mode 100644 index 000000000..f6ef581d8 --- /dev/null +++ b/pkg/sentry/socket/unix/socket_vfs2_refs.go @@ -0,0 +1,140 @@ +package unix + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const socketVFS2enableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var socketVFS2obj *SocketVFS2 + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type socketVFS2Refs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *socketVFS2Refs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *socketVFS2Refs) RefType() string { + return fmt.Sprintf("%T", socketVFS2obj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *socketVFS2Refs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *socketVFS2Refs) LogRefs() bool { + return socketVFS2enableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *socketVFS2Refs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *socketVFS2Refs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if socketVFS2enableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *socketVFS2Refs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if socketVFS2enableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *socketVFS2Refs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if socketVFS2enableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *socketVFS2Refs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD deleted file mode 100644 index 0d11bb251..000000000 --- a/pkg/sentry/socket/unix/transport/BUILD +++ /dev/null @@ -1,56 +0,0 @@ -load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "transport_message_list", - out = "transport_message_list.go", - package = "transport", - prefix = "message", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*message", - "Linker": "*message", - }, -) - -go_template_instance( - name = "queue_refs", - out = "queue_refs.go", - package = "transport", - prefix = "queue", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "queue", - }, -) - -go_library( - name = "transport", - srcs = [ - "connectioned.go", - "connectioned_state.go", - "connectionless.go", - "connectionless_state.go", - "queue.go", - "queue_refs.go", - "transport_message_list.go", - "unix.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/ilist", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/sentry/inet", - "//pkg/sync", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/socket/unix/transport/queue_refs.go b/pkg/sentry/socket/unix/transport/queue_refs.go new file mode 100644 index 000000000..9e6f52616 --- /dev/null +++ b/pkg/sentry/socket/unix/transport/queue_refs.go @@ -0,0 +1,140 @@ +package transport + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const queueenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var queueobj *queue + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type queueRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *queueRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *queueRefs) RefType() string { + return fmt.Sprintf("%T", queueobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *queueRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *queueRefs) LogRefs() bool { + return queueenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *queueRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *queueRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if queueenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *queueRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if queueenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *queueRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if queueenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *queueRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/socket/unix/transport/transport_message_list.go b/pkg/sentry/socket/unix/transport/transport_message_list.go new file mode 100644 index 000000000..945163cc1 --- /dev/null +++ b/pkg/sentry/socket/unix/transport/transport_message_list.go @@ -0,0 +1,221 @@ +package transport + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type messageElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (messageElementMapper) linkerFor(elem *message) *message { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type messageList struct { + head *message + tail *message +} + +// Reset resets list l to the empty state. +func (l *messageList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *messageList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *messageList) Front() *message { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *messageList) Back() *message { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *messageList) Len() (count int) { + for e := l.Front(); e != nil; e = (messageElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *messageList) PushFront(e *message) { + linker := messageElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + messageElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *messageList) PushBack(e *message) { + linker := messageElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + messageElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *messageList) PushBackList(m *messageList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + messageElementMapper{}.linkerFor(l.tail).SetNext(m.head) + messageElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *messageList) InsertAfter(b, e *message) { + bLinker := messageElementMapper{}.linkerFor(b) + eLinker := messageElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + messageElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *messageList) InsertBefore(a, e *message) { + aLinker := messageElementMapper{}.linkerFor(a) + eLinker := messageElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + messageElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *messageList) Remove(e *message) { + linker := messageElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + messageElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + messageElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type messageEntry struct { + next *message + prev *message +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *messageEntry) Next() *message { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *messageEntry) Prev() *message { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *messageEntry) SetNext(elem *message) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *messageEntry) SetPrev(elem *message) { + e.prev = elem +} diff --git a/pkg/sentry/socket/unix/transport/transport_state_autogen.go b/pkg/sentry/socket/unix/transport/transport_state_autogen.go new file mode 100644 index 000000000..946b0032e --- /dev/null +++ b/pkg/sentry/socket/unix/transport/transport_state_autogen.go @@ -0,0 +1,398 @@ +// automatically generated by stateify. + +package transport + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (e *connectionedEndpoint) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.connectionedEndpoint" +} + +func (e *connectionedEndpoint) StateFields() []string { + return []string{ + "baseEndpoint", + "id", + "idGenerator", + "stype", + "acceptedChan", + } +} + +func (e *connectionedEndpoint) beforeSave() {} + +// +checklocksignore +func (e *connectionedEndpoint) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + var acceptedChanValue []*connectionedEndpoint = e.saveAcceptedChan() + stateSinkObject.SaveValue(4, acceptedChanValue) + stateSinkObject.Save(0, &e.baseEndpoint) + stateSinkObject.Save(1, &e.id) + stateSinkObject.Save(2, &e.idGenerator) + stateSinkObject.Save(3, &e.stype) +} + +// +checklocksignore +func (e *connectionedEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.baseEndpoint) + stateSourceObject.Load(1, &e.id) + stateSourceObject.Load(2, &e.idGenerator) + stateSourceObject.Load(3, &e.stype) + stateSourceObject.LoadValue(4, new([]*connectionedEndpoint), func(y interface{}) { e.loadAcceptedChan(y.([]*connectionedEndpoint)) }) + stateSourceObject.AfterLoad(e.afterLoad) +} + +func (e *connectionlessEndpoint) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.connectionlessEndpoint" +} + +func (e *connectionlessEndpoint) StateFields() []string { + return []string{ + "baseEndpoint", + } +} + +func (e *connectionlessEndpoint) beforeSave() {} + +// +checklocksignore +func (e *connectionlessEndpoint) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.baseEndpoint) +} + +// +checklocksignore +func (e *connectionlessEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.baseEndpoint) + stateSourceObject.AfterLoad(e.afterLoad) +} + +func (q *queue) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.queue" +} + +func (q *queue) StateFields() []string { + return []string{ + "queueRefs", + "ReaderQueue", + "WriterQueue", + "closed", + "unread", + "used", + "limit", + "dataList", + } +} + +func (q *queue) beforeSave() {} + +// +checklocksignore +func (q *queue) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.queueRefs) + stateSinkObject.Save(1, &q.ReaderQueue) + stateSinkObject.Save(2, &q.WriterQueue) + stateSinkObject.Save(3, &q.closed) + stateSinkObject.Save(4, &q.unread) + stateSinkObject.Save(5, &q.used) + stateSinkObject.Save(6, &q.limit) + stateSinkObject.Save(7, &q.dataList) +} + +func (q *queue) afterLoad() {} + +// +checklocksignore +func (q *queue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.queueRefs) + stateSourceObject.Load(1, &q.ReaderQueue) + stateSourceObject.Load(2, &q.WriterQueue) + stateSourceObject.Load(3, &q.closed) + stateSourceObject.Load(4, &q.unread) + stateSourceObject.Load(5, &q.used) + stateSourceObject.Load(6, &q.limit) + stateSourceObject.Load(7, &q.dataList) +} + +func (r *queueRefs) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.queueRefs" +} + +func (r *queueRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *queueRefs) beforeSave() {} + +// +checklocksignore +func (r *queueRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *queueRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (l *messageList) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.messageList" +} + +func (l *messageList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *messageList) beforeSave() {} + +// +checklocksignore +func (l *messageList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *messageList) afterLoad() {} + +// +checklocksignore +func (l *messageList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *messageEntry) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.messageEntry" +} + +func (e *messageEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *messageEntry) beforeSave() {} + +// +checklocksignore +func (e *messageEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *messageEntry) afterLoad() {} + +// +checklocksignore +func (e *messageEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (c *ControlMessages) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.ControlMessages" +} + +func (c *ControlMessages) StateFields() []string { + return []string{ + "Rights", + "Credentials", + } +} + +func (c *ControlMessages) beforeSave() {} + +// +checklocksignore +func (c *ControlMessages) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.Rights) + stateSinkObject.Save(1, &c.Credentials) +} + +func (c *ControlMessages) afterLoad() {} + +// +checklocksignore +func (c *ControlMessages) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.Rights) + stateSourceObject.Load(1, &c.Credentials) +} + +func (m *message) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.message" +} + +func (m *message) StateFields() []string { + return []string{ + "messageEntry", + "Data", + "Control", + "Address", + } +} + +func (m *message) beforeSave() {} + +// +checklocksignore +func (m *message) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.messageEntry) + stateSinkObject.Save(1, &m.Data) + stateSinkObject.Save(2, &m.Control) + stateSinkObject.Save(3, &m.Address) +} + +func (m *message) afterLoad() {} + +// +checklocksignore +func (m *message) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.messageEntry) + stateSourceObject.Load(1, &m.Data) + stateSourceObject.Load(2, &m.Control) + stateSourceObject.Load(3, &m.Address) +} + +func (q *queueReceiver) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.queueReceiver" +} + +func (q *queueReceiver) StateFields() []string { + return []string{ + "readQueue", + } +} + +func (q *queueReceiver) beforeSave() {} + +// +checklocksignore +func (q *queueReceiver) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.readQueue) +} + +func (q *queueReceiver) afterLoad() {} + +// +checklocksignore +func (q *queueReceiver) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.readQueue) +} + +func (q *streamQueueReceiver) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.streamQueueReceiver" +} + +func (q *streamQueueReceiver) StateFields() []string { + return []string{ + "queueReceiver", + "buffer", + "control", + "addr", + } +} + +func (q *streamQueueReceiver) beforeSave() {} + +// +checklocksignore +func (q *streamQueueReceiver) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.queueReceiver) + stateSinkObject.Save(1, &q.buffer) + stateSinkObject.Save(2, &q.control) + stateSinkObject.Save(3, &q.addr) +} + +func (q *streamQueueReceiver) afterLoad() {} + +// +checklocksignore +func (q *streamQueueReceiver) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.queueReceiver) + stateSourceObject.Load(1, &q.buffer) + stateSourceObject.Load(2, &q.control) + stateSourceObject.Load(3, &q.addr) +} + +func (e *connectedEndpoint) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.connectedEndpoint" +} + +func (e *connectedEndpoint) StateFields() []string { + return []string{ + "endpoint", + "writeQueue", + } +} + +func (e *connectedEndpoint) beforeSave() {} + +// +checklocksignore +func (e *connectedEndpoint) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.endpoint) + stateSinkObject.Save(1, &e.writeQueue) +} + +func (e *connectedEndpoint) afterLoad() {} + +// +checklocksignore +func (e *connectedEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.endpoint) + stateSourceObject.Load(1, &e.writeQueue) +} + +func (e *baseEndpoint) StateTypeName() string { + return "pkg/sentry/socket/unix/transport.baseEndpoint" +} + +func (e *baseEndpoint) StateFields() []string { + return []string{ + "Queue", + "DefaultSocketOptionsHandler", + "receiver", + "connected", + "path", + "ops", + } +} + +func (e *baseEndpoint) beforeSave() {} + +// +checklocksignore +func (e *baseEndpoint) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Queue) + stateSinkObject.Save(1, &e.DefaultSocketOptionsHandler) + stateSinkObject.Save(2, &e.receiver) + stateSinkObject.Save(3, &e.connected) + stateSinkObject.Save(4, &e.path) + stateSinkObject.Save(5, &e.ops) +} + +func (e *baseEndpoint) afterLoad() {} + +// +checklocksignore +func (e *baseEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Queue) + stateSourceObject.Load(1, &e.DefaultSocketOptionsHandler) + stateSourceObject.Load(2, &e.receiver) + stateSourceObject.Load(3, &e.connected) + stateSourceObject.Load(4, &e.path) + stateSourceObject.Load(5, &e.ops) +} + +func init() { + state.Register((*connectionedEndpoint)(nil)) + state.Register((*connectionlessEndpoint)(nil)) + state.Register((*queue)(nil)) + state.Register((*queueRefs)(nil)) + state.Register((*messageList)(nil)) + state.Register((*messageEntry)(nil)) + state.Register((*ControlMessages)(nil)) + state.Register((*message)(nil)) + state.Register((*queueReceiver)(nil)) + state.Register((*streamQueueReceiver)(nil)) + state.Register((*connectedEndpoint)(nil)) + state.Register((*baseEndpoint)(nil)) +} diff --git a/pkg/sentry/socket/unix/unix_state_autogen.go b/pkg/sentry/socket/unix/unix_state_autogen.go new file mode 100644 index 000000000..e6169dfad --- /dev/null +++ b/pkg/sentry/socket/unix/unix_state_autogen.go @@ -0,0 +1,168 @@ +// automatically generated by stateify. + +package unix + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *socketOperationsRefs) StateTypeName() string { + return "pkg/sentry/socket/unix.socketOperationsRefs" +} + +func (r *socketOperationsRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *socketOperationsRefs) beforeSave() {} + +// +checklocksignore +func (r *socketOperationsRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *socketOperationsRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (r *socketVFS2Refs) StateTypeName() string { + return "pkg/sentry/socket/unix.socketVFS2Refs" +} + +func (r *socketVFS2Refs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *socketVFS2Refs) beforeSave() {} + +// +checklocksignore +func (r *socketVFS2Refs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *socketVFS2Refs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (s *SocketOperations) StateTypeName() string { + return "pkg/sentry/socket/unix.SocketOperations" +} + +func (s *SocketOperations) StateFields() []string { + return []string{ + "socketOperationsRefs", + "socketOpsCommon", + } +} + +func (s *SocketOperations) beforeSave() {} + +// +checklocksignore +func (s *SocketOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.socketOperationsRefs) + stateSinkObject.Save(1, &s.socketOpsCommon) +} + +func (s *SocketOperations) afterLoad() {} + +// +checklocksignore +func (s *SocketOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.socketOperationsRefs) + stateSourceObject.Load(1, &s.socketOpsCommon) +} + +func (s *socketOpsCommon) StateTypeName() string { + return "pkg/sentry/socket/unix.socketOpsCommon" +} + +func (s *socketOpsCommon) StateFields() []string { + return []string{ + "SendReceiveTimeout", + "ep", + "stype", + "abstractName", + "abstractNamespace", + } +} + +func (s *socketOpsCommon) beforeSave() {} + +// +checklocksignore +func (s *socketOpsCommon) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SendReceiveTimeout) + stateSinkObject.Save(1, &s.ep) + stateSinkObject.Save(2, &s.stype) + stateSinkObject.Save(3, &s.abstractName) + stateSinkObject.Save(4, &s.abstractNamespace) +} + +func (s *socketOpsCommon) afterLoad() {} + +// +checklocksignore +func (s *socketOpsCommon) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SendReceiveTimeout) + stateSourceObject.Load(1, &s.ep) + stateSourceObject.Load(2, &s.stype) + stateSourceObject.Load(3, &s.abstractName) + stateSourceObject.Load(4, &s.abstractNamespace) +} + +func (s *SocketVFS2) StateTypeName() string { + return "pkg/sentry/socket/unix.SocketVFS2" +} + +func (s *SocketVFS2) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "LockFD", + "socketVFS2Refs", + "socketOpsCommon", + } +} + +func (s *SocketVFS2) beforeSave() {} + +// +checklocksignore +func (s *SocketVFS2) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.vfsfd) + stateSinkObject.Save(1, &s.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &s.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &s.LockFD) + stateSinkObject.Save(4, &s.socketVFS2Refs) + stateSinkObject.Save(5, &s.socketOpsCommon) +} + +func (s *SocketVFS2) afterLoad() {} + +// +checklocksignore +func (s *SocketVFS2) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.vfsfd) + stateSourceObject.Load(1, &s.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &s.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &s.LockFD) + stateSourceObject.Load(4, &s.socketVFS2Refs) + stateSourceObject.Load(5, &s.socketOpsCommon) +} + +func init() { + state.Register((*socketOperationsRefs)(nil)) + state.Register((*socketVFS2Refs)(nil)) + state.Register((*SocketOperations)(nil)) + state.Register((*socketOpsCommon)(nil)) + state.Register((*SocketVFS2)(nil)) +} diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD deleted file mode 100644 index 7f02807c5..000000000 --- a/pkg/sentry/state/BUILD +++ /dev/null @@ -1,26 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "state", - srcs = [ - "state.go", - "state_metadata.go", - "state_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/log", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/time", - "//pkg/sentry/vfs", - "//pkg/sentry/watchdog", - "//pkg/state/statefile", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/state/state_state_autogen.go b/pkg/sentry/state/state_state_autogen.go new file mode 100644 index 000000000..305f03f52 --- /dev/null +++ b/pkg/sentry/state/state_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build go1.1 +// +build go1.1 + +package state diff --git a/pkg/sentry/state/state_unsafe_state_autogen.go b/pkg/sentry/state/state_unsafe_state_autogen.go new file mode 100644 index 000000000..6c2b29632 --- /dev/null +++ b/pkg/sentry/state/state_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package state diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD deleted file mode 100644 index 369541c7a..000000000 --- a/pkg/sentry/strace/BUILD +++ /dev/null @@ -1,46 +0,0 @@ -load("//tools:defs.bzl", "go_library", "proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "strace", - srcs = [ - "capability.go", - "clone.go", - "epoll.go", - "futex.go", - "linux64_amd64.go", - "linux64_arm64.go", - "mmap.go", - "open.go", - "poll.go", - "ptrace.go", - "select.go", - "signal.go", - "socket.go", - "strace.go", - "syscalls.go", - ], - visibility = ["//:sandbox"], - deps = [ - ":strace_go_proto", - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/eventchannel", - "//pkg/hostarch", - "//pkg/marshal/primitive", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/kernel", - "//pkg/sentry/socket", - "//pkg/sentry/socket/netlink", - "//pkg/sentry/syscalls/linux", - ], -) - -proto_library( - name = "strace", - srcs = ["strace.proto"], - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/strace/strace.proto b/pkg/sentry/strace/strace.proto deleted file mode 100644 index 906c52c51..000000000 --- a/pkg/sentry/strace/strace.proto +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -message Strace { - // Process name that made the syscall. - string process = 1; - - // Syscall function name. - string function = 2; - - // List of syscall arguments formatted as strings. - repeated string args = 3; - - oneof info { - StraceEnter enter = 4; - StraceExit exit = 5; - } -} - -message StraceEnter {} - -message StraceExit { - // Return value formatted as string. - string return = 1; - - // Formatted error string in case syscall failed. - string error = 2; - - // Value of errno upon syscall exit. - int64 err_no = 3; // errno is a macro and gets expanded :-( - - // Time elapsed between syscall enter and exit. - int64 elapsed_ns = 4; -} diff --git a/pkg/sentry/strace/strace_amd64_state_autogen.go b/pkg/sentry/strace/strace_amd64_state_autogen.go new file mode 100644 index 000000000..2a3d1077f --- /dev/null +++ b/pkg/sentry/strace/strace_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 +// +build amd64 + +package strace diff --git a/pkg/sentry/strace/strace_arm64_state_autogen.go b/pkg/sentry/strace/strace_arm64_state_autogen.go new file mode 100644 index 000000000..c64c9df57 --- /dev/null +++ b/pkg/sentry/strace/strace_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package strace diff --git a/pkg/sentry/strace/strace_go_proto/strace.pb.go b/pkg/sentry/strace/strace_go_proto/strace.pb.go new file mode 100644 index 000000000..f3d5c1108 --- /dev/null +++ b/pkg/sentry/strace/strace_go_proto/strace.pb.go @@ -0,0 +1,362 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/strace/strace.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type Strace struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Process string `protobuf:"bytes,1,opt,name=process,proto3" json:"process,omitempty"` + Function string `protobuf:"bytes,2,opt,name=function,proto3" json:"function,omitempty"` + Args []string `protobuf:"bytes,3,rep,name=args,proto3" json:"args,omitempty"` + // Types that are assignable to Info: + // *Strace_Enter + // *Strace_Exit + Info isStrace_Info `protobuf_oneof:"info"` +} + +func (x *Strace) Reset() { + *x = Strace{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_strace_strace_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Strace) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Strace) ProtoMessage() {} + +func (x *Strace) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_strace_strace_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Strace.ProtoReflect.Descriptor instead. +func (*Strace) Descriptor() ([]byte, []int) { + return file_pkg_sentry_strace_strace_proto_rawDescGZIP(), []int{0} +} + +func (x *Strace) GetProcess() string { + if x != nil { + return x.Process + } + return "" +} + +func (x *Strace) GetFunction() string { + if x != nil { + return x.Function + } + return "" +} + +func (x *Strace) GetArgs() []string { + if x != nil { + return x.Args + } + return nil +} + +func (m *Strace) GetInfo() isStrace_Info { + if m != nil { + return m.Info + } + return nil +} + +func (x *Strace) GetEnter() *StraceEnter { + if x, ok := x.GetInfo().(*Strace_Enter); ok { + return x.Enter + } + return nil +} + +func (x *Strace) GetExit() *StraceExit { + if x, ok := x.GetInfo().(*Strace_Exit); ok { + return x.Exit + } + return nil +} + +type isStrace_Info interface { + isStrace_Info() +} + +type Strace_Enter struct { + Enter *StraceEnter `protobuf:"bytes,4,opt,name=enter,proto3,oneof"` +} + +type Strace_Exit struct { + Exit *StraceExit `protobuf:"bytes,5,opt,name=exit,proto3,oneof"` +} + +func (*Strace_Enter) isStrace_Info() {} + +func (*Strace_Exit) isStrace_Info() {} + +type StraceEnter struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *StraceEnter) Reset() { + *x = StraceEnter{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_strace_strace_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StraceEnter) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StraceEnter) ProtoMessage() {} + +func (x *StraceEnter) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_strace_strace_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StraceEnter.ProtoReflect.Descriptor instead. +func (*StraceEnter) Descriptor() ([]byte, []int) { + return file_pkg_sentry_strace_strace_proto_rawDescGZIP(), []int{1} +} + +type StraceExit struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Return string `protobuf:"bytes,1,opt,name=return,proto3" json:"return,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` + ErrNo int64 `protobuf:"varint,3,opt,name=err_no,json=errNo,proto3" json:"err_no,omitempty"` + ElapsedNs int64 `protobuf:"varint,4,opt,name=elapsed_ns,json=elapsedNs,proto3" json:"elapsed_ns,omitempty"` +} + +func (x *StraceExit) Reset() { + *x = StraceExit{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_strace_strace_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StraceExit) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StraceExit) ProtoMessage() {} + +func (x *StraceExit) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_strace_strace_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StraceExit.ProtoReflect.Descriptor instead. +func (*StraceExit) Descriptor() ([]byte, []int) { + return file_pkg_sentry_strace_strace_proto_rawDescGZIP(), []int{2} +} + +func (x *StraceExit) GetReturn() string { + if x != nil { + return x.Return + } + return "" +} + +func (x *StraceExit) GetError() string { + if x != nil { + return x.Error + } + return "" +} + +func (x *StraceExit) GetErrNo() int64 { + if x != nil { + return x.ErrNo + } + return 0 +} + +func (x *StraceExit) GetElapsedNs() int64 { + if x != nil { + return x.ElapsedNs + } + return 0 +} + +var File_pkg_sentry_strace_strace_proto protoreflect.FileDescriptor + +var file_pkg_sentry_strace_strace_proto_rawDesc = []byte{ + 0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x73, 0x74, 0x72, + 0x61, 0x63, 0x65, 0x2f, 0x73, 0x74, 0x72, 0x61, 0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x22, 0xb1, 0x01, 0x0a, 0x06, 0x53, 0x74, 0x72, + 0x61, 0x63, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x12, 0x1a, 0x0a, + 0x08, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x08, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x61, 0x72, 0x67, + 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x61, 0x72, 0x67, 0x73, 0x12, 0x2b, 0x0a, + 0x05, 0x65, 0x6e, 0x74, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x67, + 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x72, 0x61, 0x63, 0x65, 0x45, 0x6e, 0x74, 0x65, + 0x72, 0x48, 0x00, 0x52, 0x05, 0x65, 0x6e, 0x74, 0x65, 0x72, 0x12, 0x28, 0x0a, 0x04, 0x65, 0x78, + 0x69, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x67, 0x76, 0x69, 0x73, 0x6f, + 0x72, 0x2e, 0x53, 0x74, 0x72, 0x61, 0x63, 0x65, 0x45, 0x78, 0x69, 0x74, 0x48, 0x00, 0x52, 0x04, + 0x65, 0x78, 0x69, 0x74, 0x42, 0x06, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x22, 0x0d, 0x0a, 0x0b, + 0x53, 0x74, 0x72, 0x61, 0x63, 0x65, 0x45, 0x6e, 0x74, 0x65, 0x72, 0x22, 0x70, 0x0a, 0x0a, 0x53, + 0x74, 0x72, 0x61, 0x63, 0x65, 0x45, 0x78, 0x69, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x65, 0x74, + 0x75, 0x72, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x15, 0x0a, 0x06, 0x65, 0x72, 0x72, 0x5f, 0x6e, + 0x6f, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x65, 0x72, 0x72, 0x4e, 0x6f, 0x12, 0x1d, + 0x0a, 0x0a, 0x65, 0x6c, 0x61, 0x70, 0x73, 0x65, 0x64, 0x5f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x09, 0x65, 0x6c, 0x61, 0x70, 0x73, 0x65, 0x64, 0x4e, 0x73, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_sentry_strace_strace_proto_rawDescOnce sync.Once + file_pkg_sentry_strace_strace_proto_rawDescData = file_pkg_sentry_strace_strace_proto_rawDesc +) + +func file_pkg_sentry_strace_strace_proto_rawDescGZIP() []byte { + file_pkg_sentry_strace_strace_proto_rawDescOnce.Do(func() { + file_pkg_sentry_strace_strace_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_strace_strace_proto_rawDescData) + }) + return file_pkg_sentry_strace_strace_proto_rawDescData +} + +var file_pkg_sentry_strace_strace_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_pkg_sentry_strace_strace_proto_goTypes = []interface{}{ + (*Strace)(nil), // 0: gvisor.Strace + (*StraceEnter)(nil), // 1: gvisor.StraceEnter + (*StraceExit)(nil), // 2: gvisor.StraceExit +} +var file_pkg_sentry_strace_strace_proto_depIdxs = []int32{ + 1, // 0: gvisor.Strace.enter:type_name -> gvisor.StraceEnter + 2, // 1: gvisor.Strace.exit:type_name -> gvisor.StraceExit + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_strace_strace_proto_init() } +func file_pkg_sentry_strace_strace_proto_init() { + if File_pkg_sentry_strace_strace_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_strace_strace_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Strace); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_sentry_strace_strace_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StraceEnter); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_sentry_strace_strace_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StraceExit); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_pkg_sentry_strace_strace_proto_msgTypes[0].OneofWrappers = []interface{}{ + (*Strace_Enter)(nil), + (*Strace_Exit)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_strace_strace_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_strace_strace_proto_goTypes, + DependencyIndexes: file_pkg_sentry_strace_strace_proto_depIdxs, + MessageInfos: file_pkg_sentry_strace_strace_proto_msgTypes, + }.Build() + File_pkg_sentry_strace_strace_proto = out.File + file_pkg_sentry_strace_strace_proto_rawDesc = nil + file_pkg_sentry_strace_strace_proto_goTypes = nil + file_pkg_sentry_strace_strace_proto_depIdxs = nil +} diff --git a/pkg/sentry/strace/strace_state_autogen.go b/pkg/sentry/strace/strace_state_autogen.go new file mode 100644 index 000000000..33f6a7a54 --- /dev/null +++ b/pkg/sentry/strace/strace_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package strace diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD deleted file mode 100644 index f2c55588f..000000000 --- a/pkg/sentry/syscalls/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "syscalls", - srcs = [ - "epoll.go", - "syscalls.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/errors/linuxerr", - "//pkg/sentry/arch", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/time", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD deleted file mode 100644 index ccccce6a9..000000000 --- a/pkg/sentry/syscalls/linux/BUILD +++ /dev/null @@ -1,111 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "linux", - srcs = [ - "error.go", - "flags.go", - "linux64.go", - "sigset.go", - "sys_aio.go", - "sys_capability.go", - "sys_clone_amd64.go", - "sys_clone_arm64.go", - "sys_epoll.go", - "sys_eventfd.go", - "sys_file.go", - "sys_futex.go", - "sys_getdents.go", - "sys_identity.go", - "sys_inotify.go", - "sys_lseek.go", - "sys_membarrier.go", - "sys_mempolicy.go", - "sys_mmap.go", - "sys_mount.go", - "sys_msgqueue.go", - "sys_pipe.go", - "sys_poll.go", - "sys_prctl.go", - "sys_random.go", - "sys_read.go", - "sys_rlimit.go", - "sys_rseq.go", - "sys_rusage.go", - "sys_sched.go", - "sys_seccomp.go", - "sys_sem.go", - "sys_shm.go", - "sys_signal.go", - "sys_socket.go", - "sys_splice.go", - "sys_stat.go", - "sys_stat_amd64.go", - "sys_stat_arm64.go", - "sys_sync.go", - "sys_sysinfo.go", - "sys_syslog.go", - "sys_thread.go", - "sys_time.go", - "sys_timer.go", - "sys_timerfd.go", - "sys_tls_amd64.go", - "sys_tls_arm64.go", - "sys_utsname.go", - "sys_write.go", - "sys_xattr.go", - "timespec.go", - ], - marshal = True, - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/bpf", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/metric", - "//pkg/rand", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fs/timerfd", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/fsbridge", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/eventfd", - "//pkg/sentry/kernel/fasync", - "//pkg/sentry/kernel/ipc", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/kernel/signalfd", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/loader", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/syscalls", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/syscalls/linux/linux_abi_autogen_unsafe.go b/pkg/sentry/syscalls/linux/linux_abi_autogen_unsafe.go new file mode 100644 index 000000000..409927919 --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_abi_autogen_unsafe.go @@ -0,0 +1,712 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*MessageHeader64)(nil) +var _ marshal.Marshallable = (*SchedParam)(nil) +var _ marshal.Marshallable = (*direntHdr)(nil) +var _ marshal.Marshallable = (*multipleMessageHeader64)(nil) +var _ marshal.Marshallable = (*oldDirentHdr)(nil) +var _ marshal.Marshallable = (*rlimit64)(nil) +var _ marshal.Marshallable = (*userSockFprog)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (d *direntHdr) SizeBytes() int { + return 1 + + (*oldDirentHdr)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (d *direntHdr) MarshalBytes(dst []byte) { + d.OldHdr.MarshalBytes(dst[:d.OldHdr.SizeBytes()]) + dst = dst[d.OldHdr.SizeBytes():] + dst[0] = byte(d.Typ) + dst = dst[1:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (d *direntHdr) UnmarshalBytes(src []byte) { + d.OldHdr.UnmarshalBytes(src[:d.OldHdr.SizeBytes()]) + src = src[d.OldHdr.SizeBytes():] + d.Typ = uint8(src[0]) + src = src[1:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (d *direntHdr) Packed() bool { + return false +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (d *direntHdr) MarshalUnsafe(dst []byte) { + // Type direntHdr doesn't have a packed layout in memory, fallback to MarshalBytes. + d.MarshalBytes(dst) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (d *direntHdr) UnmarshalUnsafe(src []byte) { + // Type direntHdr doesn't have a packed layout in memory, fallback to UnmarshalBytes. + d.UnmarshalBytes(src) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (d *direntHdr) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Type direntHdr doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(d.SizeBytes()) // escapes: okay. + d.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (d *direntHdr) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return d.CopyOutN(cc, addr, d.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (d *direntHdr) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Type direntHdr doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(d.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + d.UnmarshalBytes(buf) // escapes: fallback. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (d *direntHdr) WriteTo(writer io.Writer) (int64, error) { + // Type direntHdr doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, d.SizeBytes()) + d.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (o *oldDirentHdr) SizeBytes() int { + return 18 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (o *oldDirentHdr) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(o.Ino)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(o.Off)) + dst = dst[8:] + hostarch.ByteOrder.PutUint16(dst[:2], uint16(o.Reclen)) + dst = dst[2:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (o *oldDirentHdr) UnmarshalBytes(src []byte) { + o.Ino = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + o.Off = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + o.Reclen = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (o *oldDirentHdr) Packed() bool { + return false +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (o *oldDirentHdr) MarshalUnsafe(dst []byte) { + // Type oldDirentHdr doesn't have a packed layout in memory, fallback to MarshalBytes. + o.MarshalBytes(dst) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (o *oldDirentHdr) UnmarshalUnsafe(src []byte) { + // Type oldDirentHdr doesn't have a packed layout in memory, fallback to UnmarshalBytes. + o.UnmarshalBytes(src) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (o *oldDirentHdr) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Type oldDirentHdr doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(o.SizeBytes()) // escapes: okay. + o.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (o *oldDirentHdr) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return o.CopyOutN(cc, addr, o.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (o *oldDirentHdr) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Type oldDirentHdr doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(o.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + o.UnmarshalBytes(buf) // escapes: fallback. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (o *oldDirentHdr) WriteTo(writer io.Writer) (int64, error) { + // Type oldDirentHdr doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, o.SizeBytes()) + o.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (r *rlimit64) SizeBytes() int { + return 16 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (r *rlimit64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(r.Cur)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(r.Max)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (r *rlimit64) UnmarshalBytes(src []byte) { + r.Cur = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + r.Max = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (r *rlimit64) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (r *rlimit64) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(r), uintptr(r.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (r *rlimit64) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(r), unsafe.Pointer(&src[0]), uintptr(r.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (r *rlimit64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(r))) + hdr.Len = r.SizeBytes() + hdr.Cap = r.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that r + // must live until the use above. + runtime.KeepAlive(r) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (r *rlimit64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return r.CopyOutN(cc, addr, r.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (r *rlimit64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(r))) + hdr.Len = r.SizeBytes() + hdr.Cap = r.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that r + // must live until the use above. + runtime.KeepAlive(r) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (r *rlimit64) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(r))) + hdr.Len = r.SizeBytes() + hdr.Cap = r.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that r + // must live until the use above. + runtime.KeepAlive(r) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *SchedParam) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *SchedParam) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(s.schedPriority)) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *SchedParam) UnmarshalBytes(src []byte) { + s.schedPriority = int32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (s *SchedParam) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (s *SchedParam) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(s), uintptr(s.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (s *SchedParam) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(s), unsafe.Pointer(&src[0]), uintptr(s.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (s *SchedParam) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (s *SchedParam) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return s.CopyOutN(cc, addr, s.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (s *SchedParam) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (s *SchedParam) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (u *userSockFprog) SizeBytes() int { + return 10 + + 1*6 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (u *userSockFprog) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint16(dst[:2], uint16(u.Len)) + dst = dst[2:] + // Padding: dst[:sizeof(byte)*6] ~= [6]byte{0} + dst = dst[1*(6):] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(u.Filter)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (u *userSockFprog) UnmarshalBytes(src []byte) { + u.Len = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + // Padding: ~ copy([6]byte(u._), src[:sizeof(byte)*6]) + src = src[1*(6):] + u.Filter = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (u *userSockFprog) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (u *userSockFprog) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(u), uintptr(u.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (u *userSockFprog) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(u), unsafe.Pointer(&src[0]), uintptr(u.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (u *userSockFprog) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (u *userSockFprog) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return u.CopyOutN(cc, addr, u.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (u *userSockFprog) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (u *userSockFprog) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(u))) + hdr.Len = u.SizeBytes() + hdr.Cap = u.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that u + // must live until the use above. + runtime.KeepAlive(u) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *MessageHeader64) SizeBytes() int { + return 56 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MessageHeader64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.Name)) + dst = dst[8:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.NameLen)) + dst = dst[4:] + // Padding: dst[:sizeof(uint32)] ~= uint32(0) + dst = dst[4:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.Iov)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.IovLen)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.Control)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.ControlLen)) + dst = dst[8:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.Flags)) + dst = dst[4:] + // Padding: dst[:sizeof(int32)] ~= int32(0) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MessageHeader64) UnmarshalBytes(src []byte) { + m.Name = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.NameLen = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + // Padding: var _ uint32 ~= src[:sizeof(uint32)] + src = src[4:] + m.Iov = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.IovLen = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.Control = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.ControlLen = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.Flags = int32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + // Padding: var _ int32 ~= src[:sizeof(int32)] + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *MessageHeader64) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *MessageHeader64) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *MessageHeader64) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *MessageHeader64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *MessageHeader64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *MessageHeader64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *MessageHeader64) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *multipleMessageHeader64) SizeBytes() int { + return 8 + + (*MessageHeader64)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *multipleMessageHeader64) MarshalBytes(dst []byte) { + m.msgHdr.MarshalBytes(dst[:m.msgHdr.SizeBytes()]) + dst = dst[m.msgHdr.SizeBytes():] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.msgLen)) + dst = dst[4:] + // Padding: dst[:sizeof(int32)] ~= int32(0) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *multipleMessageHeader64) UnmarshalBytes(src []byte) { + m.msgHdr.UnmarshalBytes(src[:m.msgHdr.SizeBytes()]) + src = src[m.msgHdr.SizeBytes():] + m.msgLen = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + // Padding: var _ int32 ~= src[:sizeof(int32)] + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *multipleMessageHeader64) Packed() bool { + return m.msgHdr.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *multipleMessageHeader64) MarshalUnsafe(dst []byte) { + if m.msgHdr.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes())) + } else { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fallback to MarshalBytes. + m.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *multipleMessageHeader64) UnmarshalUnsafe(src []byte) { + if m.msgHdr.Packed() { + gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes())) + } else { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fallback to UnmarshalBytes. + m.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *multipleMessageHeader64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !m.msgHdr.Packed() { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay. + m.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *multipleMessageHeader64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *multipleMessageHeader64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !m.msgHdr.Packed() { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + m.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *multipleMessageHeader64) WriteTo(writer io.Writer) (int64, error) { + if !m.msgHdr.Packed() { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, m.SizeBytes()) + m.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/syscalls/linux/linux_amd64_abi_autogen_unsafe.go b/pkg/sentry/syscalls/linux/linux_amd64_abi_autogen_unsafe.go new file mode 100644 index 000000000..84d67731a --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_amd64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build amd64 && amd64 && amd64 +// +build amd64,amd64,amd64 + +package linux + +import ( +) + diff --git a/pkg/sentry/syscalls/linux/linux_amd64_state_autogen.go b/pkg/sentry/syscalls/linux/linux_amd64_state_autogen.go new file mode 100644 index 000000000..19f7a855e --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 && amd64 && amd64 +// +build amd64,amd64,amd64 + +package linux diff --git a/pkg/sentry/syscalls/linux/linux_arm64_abi_autogen_unsafe.go b/pkg/sentry/syscalls/linux/linux_arm64_abi_autogen_unsafe.go new file mode 100644 index 000000000..6bbdf8d79 --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_arm64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build arm64 && arm64 && arm64 +// +build arm64,arm64,arm64 + +package linux + +import ( +) + diff --git a/pkg/sentry/syscalls/linux/linux_arm64_state_autogen.go b/pkg/sentry/syscalls/linux/linux_arm64_state_autogen.go new file mode 100644 index 000000000..ecc524560 --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 && arm64 && arm64 +// +build arm64,arm64,arm64 + +package linux diff --git a/pkg/sentry/syscalls/linux/linux_state_autogen.go b/pkg/sentry/syscalls/linux/linux_state_autogen.go new file mode 100644 index 000000000..cc577e4ac --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_state_autogen.go @@ -0,0 +1,112 @@ +// automatically generated by stateify. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *futexWaitRestartBlock) StateTypeName() string { + return "pkg/sentry/syscalls/linux.futexWaitRestartBlock" +} + +func (f *futexWaitRestartBlock) StateFields() []string { + return []string{ + "duration", + "addr", + "private", + "val", + "mask", + } +} + +func (f *futexWaitRestartBlock) beforeSave() {} + +// +checklocksignore +func (f *futexWaitRestartBlock) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.duration) + stateSinkObject.Save(1, &f.addr) + stateSinkObject.Save(2, &f.private) + stateSinkObject.Save(3, &f.val) + stateSinkObject.Save(4, &f.mask) +} + +func (f *futexWaitRestartBlock) afterLoad() {} + +// +checklocksignore +func (f *futexWaitRestartBlock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.duration) + stateSourceObject.Load(1, &f.addr) + stateSourceObject.Load(2, &f.private) + stateSourceObject.Load(3, &f.val) + stateSourceObject.Load(4, &f.mask) +} + +func (p *pollRestartBlock) StateTypeName() string { + return "pkg/sentry/syscalls/linux.pollRestartBlock" +} + +func (p *pollRestartBlock) StateFields() []string { + return []string{ + "pfdAddr", + "nfds", + "timeout", + } +} + +func (p *pollRestartBlock) beforeSave() {} + +// +checklocksignore +func (p *pollRestartBlock) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pfdAddr) + stateSinkObject.Save(1, &p.nfds) + stateSinkObject.Save(2, &p.timeout) +} + +func (p *pollRestartBlock) afterLoad() {} + +// +checklocksignore +func (p *pollRestartBlock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pfdAddr) + stateSourceObject.Load(1, &p.nfds) + stateSourceObject.Load(2, &p.timeout) +} + +func (n *clockNanosleepRestartBlock) StateTypeName() string { + return "pkg/sentry/syscalls/linux.clockNanosleepRestartBlock" +} + +func (n *clockNanosleepRestartBlock) StateFields() []string { + return []string{ + "c", + "end", + "rem", + } +} + +func (n *clockNanosleepRestartBlock) beforeSave() {} + +// +checklocksignore +func (n *clockNanosleepRestartBlock) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.c) + stateSinkObject.Save(1, &n.end) + stateSinkObject.Save(2, &n.rem) +} + +func (n *clockNanosleepRestartBlock) afterLoad() {} + +// +checklocksignore +func (n *clockNanosleepRestartBlock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.c) + stateSourceObject.Load(1, &n.end) + stateSourceObject.Load(2, &n.rem) +} + +func init() { + state.Register((*futexWaitRestartBlock)(nil)) + state.Register((*pollRestartBlock)(nil)) + state.Register((*clockNanosleepRestartBlock)(nil)) +} diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD deleted file mode 100644 index a73f096ff..000000000 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ /dev/null @@ -1,80 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "vfs2", - srcs = [ - "aio.go", - "epoll.go", - "eventfd.go", - "execve.go", - "fd.go", - "filesystem.go", - "fscontext.go", - "getdents.go", - "inotify.go", - "ioctl.go", - "lock.go", - "memfd.go", - "mmap.go", - "mount.go", - "path.go", - "pipe.go", - "poll.go", - "read_write.go", - "setstat.go", - "signal.go", - "socket.go", - "splice.go", - "stat.go", - "stat_amd64.go", - "stat_arm64.go", - "sync.go", - "timerfd.go", - "vfs2.go", - "xattr.go", - ], - marshal = True, - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fspath", - "//pkg/gohacks", - "//pkg/hostarch", - "//pkg/log", - "//pkg/marshal", - "//pkg/marshal/primitive", - "//pkg/sentry/arch", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsbridge", - "//pkg/sentry/fsimpl/eventfd", - "//pkg/sentry/fsimpl/pipefs", - "//pkg/sentry/fsimpl/signalfd", - "//pkg/sentry/fsimpl/timerfd", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/fasync", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/loader", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/syscalls", - "//pkg/sentry/syscalls/linux", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2_abi_autogen_unsafe.go b/pkg/sentry/syscalls/linux/vfs2/vfs2_abi_autogen_unsafe.go new file mode 100644 index 000000000..01b0d465f --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2_abi_autogen_unsafe.go @@ -0,0 +1,366 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*MessageHeader64)(nil) +var _ marshal.Marshallable = (*multipleMessageHeader64)(nil) +var _ marshal.Marshallable = (*sigSetWithSize)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *sigSetWithSize) SizeBytes() int { + return 16 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *sigSetWithSize) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.sigsetAddr)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(s.sizeofSigset)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *sigSetWithSize) UnmarshalBytes(src []byte) { + s.sigsetAddr = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + s.sizeofSigset = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (s *sigSetWithSize) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (s *sigSetWithSize) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(s), uintptr(s.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (s *sigSetWithSize) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(s), unsafe.Pointer(&src[0]), uintptr(s.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (s *sigSetWithSize) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (s *sigSetWithSize) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return s.CopyOutN(cc, addr, s.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (s *sigSetWithSize) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (s *sigSetWithSize) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *MessageHeader64) SizeBytes() int { + return 56 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MessageHeader64) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.Name)) + dst = dst[8:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.NameLen)) + dst = dst[4:] + // Padding: dst[:sizeof(uint32)] ~= uint32(0) + dst = dst[4:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.Iov)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.IovLen)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.Control)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.ControlLen)) + dst = dst[8:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.Flags)) + dst = dst[4:] + // Padding: dst[:sizeof(int32)] ~= int32(0) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MessageHeader64) UnmarshalBytes(src []byte) { + m.Name = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.NameLen = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + // Padding: var _ uint32 ~= src[:sizeof(uint32)] + src = src[4:] + m.Iov = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.IovLen = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.Control = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.ControlLen = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + m.Flags = int32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + // Padding: var _ int32 ~= src[:sizeof(int32)] + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *MessageHeader64) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *MessageHeader64) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *MessageHeader64) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *MessageHeader64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *MessageHeader64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *MessageHeader64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *MessageHeader64) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *multipleMessageHeader64) SizeBytes() int { + return 8 + + (*MessageHeader64)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *multipleMessageHeader64) MarshalBytes(dst []byte) { + m.msgHdr.MarshalBytes(dst[:m.msgHdr.SizeBytes()]) + dst = dst[m.msgHdr.SizeBytes():] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.msgLen)) + dst = dst[4:] + // Padding: dst[:sizeof(int32)] ~= int32(0) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *multipleMessageHeader64) UnmarshalBytes(src []byte) { + m.msgHdr.UnmarshalBytes(src[:m.msgHdr.SizeBytes()]) + src = src[m.msgHdr.SizeBytes():] + m.msgLen = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + // Padding: var _ int32 ~= src[:sizeof(int32)] + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *multipleMessageHeader64) Packed() bool { + return m.msgHdr.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *multipleMessageHeader64) MarshalUnsafe(dst []byte) { + if m.msgHdr.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes())) + } else { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fallback to MarshalBytes. + m.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *multipleMessageHeader64) UnmarshalUnsafe(src []byte) { + if m.msgHdr.Packed() { + gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes())) + } else { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fallback to UnmarshalBytes. + m.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *multipleMessageHeader64) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !m.msgHdr.Packed() { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay. + m.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *multipleMessageHeader64) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *multipleMessageHeader64) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !m.msgHdr.Packed() { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + m.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *multipleMessageHeader64) WriteTo(writer io.Writer) (int64, error) { + if !m.msgHdr.Packed() { + // Type multipleMessageHeader64 doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, m.SizeBytes()) + m.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2_amd64_abi_autogen_unsafe.go b/pkg/sentry/syscalls/linux/vfs2/vfs2_amd64_abi_autogen_unsafe.go new file mode 100644 index 000000000..d5335190c --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2_amd64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build amd64 +// +build amd64 + +package vfs2 + +import ( +) + diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2_amd64_state_autogen.go b/pkg/sentry/syscalls/linux/vfs2/vfs2_amd64_state_autogen.go new file mode 100644 index 000000000..467b4258c --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 +// +build amd64 + +package vfs2 diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2_arm64_abi_autogen_unsafe.go b/pkg/sentry/syscalls/linux/vfs2/vfs2_arm64_abi_autogen_unsafe.go new file mode 100644 index 000000000..07ab07d17 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2_arm64_abi_autogen_unsafe.go @@ -0,0 +1,16 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +// If there are issues with build constraint aggregation, see +// tools/go_marshal/gomarshal/generator.go:writeHeader(). The constraints here +// come from the input set of files used to generate this file. This input set +// is filtered based on pre-defined file suffixes related to build constraints, +// see tools/defs.bzl:calculate_sets(). + +//go:build arm64 +// +build arm64 + +package vfs2 + +import ( +) + diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2_arm64_state_autogen.go b/pkg/sentry/syscalls/linux/vfs2/vfs2_arm64_state_autogen.go new file mode 100644 index 000000000..2e1a50af5 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package vfs2 diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2_state_autogen.go b/pkg/sentry/syscalls/linux/vfs2/vfs2_state_autogen.go new file mode 100644 index 000000000..d02c8467f --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2_state_autogen.go @@ -0,0 +1,42 @@ +// automatically generated by stateify. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (p *pollRestartBlock) StateTypeName() string { + return "pkg/sentry/syscalls/linux/vfs2.pollRestartBlock" +} + +func (p *pollRestartBlock) StateFields() []string { + return []string{ + "pfdAddr", + "nfds", + "timeout", + } +} + +func (p *pollRestartBlock) beforeSave() {} + +// +checklocksignore +func (p *pollRestartBlock) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.pfdAddr) + stateSinkObject.Save(1, &p.nfds) + stateSinkObject.Save(2, &p.timeout) +} + +func (p *pollRestartBlock) afterLoad() {} + +// +checklocksignore +func (p *pollRestartBlock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.pfdAddr) + stateSourceObject.Load(1, &p.nfds) + stateSourceObject.Load(2, &p.timeout) +} + +func init() { + state.Register((*pollRestartBlock)(nil)) +} diff --git a/pkg/sentry/syscalls/syscalls_state_autogen.go b/pkg/sentry/syscalls/syscalls_state_autogen.go new file mode 100644 index 000000000..b577e39a3 --- /dev/null +++ b/pkg/sentry/syscalls/syscalls_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package syscalls diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD deleted file mode 100644 index 36d999c47..000000000 --- a/pkg/sentry/time/BUILD +++ /dev/null @@ -1,55 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "seqatomic_parameters", - out = "seqatomic_parameters_unsafe.go", - package = "time", - suffix = "Parameters", - template = "//pkg/sync/seqatomic:generic_seqatomic", - types = { - "Value": "Parameters", - }, -) - -go_library( - name = "time", - srcs = [ - "arith_arm64.go", - "calibrated_clock.go", - "clock_id.go", - "clocks.go", - "muldiv_amd64.s", - "muldiv_arm64.s", - "parameters.go", - "sampler.go", - "sampler_amd64.go", - "sampler_arm64.go", - "sampler_unsafe.go", - "seqatomic_parameters_unsafe.go", - "tsc_amd64.s", - "tsc_arm64.s", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/errors/linuxerr", - "//pkg/gohacks", - "//pkg/log", - "//pkg/metric", - "//pkg/sync", - "//pkg/syserror", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "time_test", - srcs = [ - "calibrated_clock_test.go", - "parameters_test.go", - "sampler_test.go", - ], - library = ":time", -) diff --git a/pkg/sentry/time/LICENSE b/pkg/sentry/time/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/pkg/sentry/time/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/sentry/time/calibrated_clock_test.go b/pkg/sentry/time/calibrated_clock_test.go deleted file mode 100644 index 0a4b1f1bf..000000000 --- a/pkg/sentry/time/calibrated_clock_test.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package time - -import ( - "testing" - "time" -) - -// newTestCalibratedClock returns a CalibratedClock that collects samples from -// the given sample list and cycle counts from the given cycle list. -func newTestCalibratedClock(samples []sample, cycles []TSCValue) *CalibratedClock { - return &CalibratedClock{ - ref: newTestSampler(samples, cycles), - } -} - -func TestConstantFrequency(t *testing.T) { - // Perfectly constant frequency. - samples := []sample{ - {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, - {before: 200000, after: 200000 + defaultOverheadCycles, ref: 200}, - {before: 300000, after: 300000 + defaultOverheadCycles, ref: 300}, - {before: 400000, after: 400000 + defaultOverheadCycles, ref: 400}, - {before: 500000, after: 500000 + defaultOverheadCycles, ref: 500}, - {before: 600000, after: 600000 + defaultOverheadCycles, ref: 600}, - {before: 700000, after: 700000 + defaultOverheadCycles, ref: 700}, - } - - c := newTestCalibratedClock(samples, nil) - - // Update from all samples. - for range samples { - c.Update() - } - - c.mu.RLock() - if !c.ready { - c.mu.RUnlock() - t.Fatalf("clock not ready") - return // For checklocks consistency. - } - // A bit after the last sample. - now, ok := c.params.ComputeTime(750000) - c.mu.RUnlock() - if !ok { - t.Fatalf("ComputeTime ok got %v want true", ok) - } - - t.Logf("now: %v", now) - - // Time should be between the current sample and where we'd expect the - // next sample. - if now < 700 || now > 800 { - t.Errorf("now got %v want > 700 && < 800", now) - } -} - -func TestErrorCorrection(t *testing.T) { - testCases := []struct { - name string - samples [5]sample - projectedTimeStart int64 - projectedTimeEnd int64 - }{ - // Initial calibration should be ~1MHz for each of these, and - // the reference clock changes in samples[2]. - { - name: "slow-down", - samples: [5]sample{ - {before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())}, - {before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())}, - // Reference clock has slowed down, causing 100ms of error. - {before: 3010000, after: 3010001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())}, - {before: 4020000, after: 4020001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())}, - {before: 5030000, after: 5030001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())}, - }, - projectedTimeStart: 3005 * time.Millisecond.Nanoseconds(), - projectedTimeEnd: 3015 * time.Millisecond.Nanoseconds(), - }, - { - name: "speed-up", - samples: [5]sample{ - {before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())}, - {before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())}, - // Reference clock has sped up, causing 100ms of error. - {before: 2990000, after: 2990001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())}, - {before: 3980000, after: 3980001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())}, - {before: 4970000, after: 4970001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())}, - }, - projectedTimeStart: 2985 * time.Millisecond.Nanoseconds(), - projectedTimeEnd: 2995 * time.Millisecond.Nanoseconds(), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - c := newTestCalibratedClock(tc.samples[:], nil) - - // Initial calibration takes two updates. - _, ok := c.Update() - if ok { - t.Fatalf("Update ready too early") - } - - params, ok := c.Update() - if !ok { - t.Fatalf("Update not ready") - } - - // Initial calibration is ~1MHz. - hz := params.Frequency - if hz < 990000 || hz > 1010000 { - t.Fatalf("Frequency got %v want > 990kHz && < 1010kHz", hz) - } - - // Project time at the next update. Given the 1MHz - // calibration, it is expected to be ~3.1s/2.9s, not - // the actual 3s. - // - // N.B. the next update time is the "after" time above. - projected, ok := params.ComputeTime(tc.samples[2].after) - if !ok { - t.Fatalf("ComputeTime ok got %v want true", ok) - } - if projected < tc.projectedTimeStart || projected > tc.projectedTimeEnd { - t.Fatalf("ComputeTime(%v) got %v want > %v && < %v", tc.samples[2].after, projected, tc.projectedTimeStart, tc.projectedTimeEnd) - } - - // Update again to see the changed reference clock. - params, ok = c.Update() - if !ok { - t.Fatalf("Update not ready") - } - - // We now know that TSC = tc.samples[2].after -> 3s, - // but with the previous params indicated that TSC - // tc.samples[2].after -> 3.5s/2.5s. We can't allow the - // clock to go backwards, and having the clock jump - // forwards is undesirable. There should be a smooth - // transition that corrects the clock error over time. - // Check that the clock is continuous at TSC = - // tc.samples[2].after. - newProjected, ok := params.ComputeTime(tc.samples[2].after) - if !ok { - t.Fatalf("ComputeTime ok got %v want true", ok) - } - if newProjected != projected { - t.Errorf("Discontinuous time; ComputeTime(%v) got %v want %v", tc.samples[2].after, newProjected, projected) - } - - // As the reference clock stablizes, ensure that the clock error - // decreases. - initialErr := c.errorNS - t.Logf("initial error: %v ns", initialErr) - - _, ok = c.Update() - if !ok { - t.Fatalf("Update not ready") - } - if c.errorNS.Magnitude() > initialErr.Magnitude() { - t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr) - } - - _, ok = c.Update() - if !ok { - t.Fatalf("Update not ready") - } - if c.errorNS.Magnitude() > initialErr.Magnitude() { - t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr) - } - - t.Logf("final error: %v ns", c.errorNS) - }) - } -} diff --git a/pkg/sentry/time/parameters_test.go b/pkg/sentry/time/parameters_test.go deleted file mode 100644 index 0ce1257f6..000000000 --- a/pkg/sentry/time/parameters_test.go +++ /dev/null @@ -1,501 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package time - -import ( - "math" - "testing" - "time" -) - -func TestParametersComputeTime(t *testing.T) { - testCases := []struct { - name string - params Parameters - now TSCValue - want int64 - }{ - { - // Now is the same as the base cycles. - name: "base-cycles", - params: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 10000, - want: 5000 * time.Millisecond.Nanoseconds(), - }, - { - // Now is the behind the base cycles. Time is frozen. - name: "backwards", - params: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 9000, - want: 5000 * time.Millisecond.Nanoseconds(), - }, - { - // Now is ahead of the base cycles. - name: "ahead", - params: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 15000, - want: 5500 * time.Millisecond.Nanoseconds(), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got, ok := tc.params.ComputeTime(tc.now) - if !ok { - t.Errorf("ComputeTime ok got %v want true", got) - } - if got != tc.want { - t.Errorf("ComputeTime got %+v want %+v", got, tc.want) - } - }) - } -} - -func TestParametersErrorAdjust(t *testing.T) { - testCases := []struct { - name string - oldParams Parameters - now TSCValue - newParams Parameters - want Parameters - errorNS ReferenceNS - wantErr bool - }{ - { - // newParams are perfectly continuous with oldParams - // and don't need adjustment. - name: "continuous", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 50000, - newParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - }, - { - // Same as "continuous", but with now ahead of - // newParams.BaseCycles. The result is the same as - // there is no error to correct. - name: "continuous-nowdiff", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 60000, - newParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - }, - { - // errorAdjust bails out if the TSC goes backwards. - name: "tsc-backwards", - oldParams: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(1000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 9000, - newParams: Parameters{ - BaseCycles: 9000, - BaseRef: ReferenceNS(1100 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - wantErr: true, - }, - { - // errorAdjust bails out if new params are from after now. - name: "params-after-now", - oldParams: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(1000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 11000, - newParams: Parameters{ - BaseCycles: 12000, - BaseRef: ReferenceNS(1200 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - wantErr: true, - }, - { - // Host clock sped up. - name: "speed-up", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 45000, - // Host frequency changed to 9000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - // From oldParams, we think ref = 4.5s at cycles = 45000. - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 9000, - }, - want: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(4500 * time.Millisecond.Nanoseconds()), - // We must decrease the new frequency by 50% to - // correct 0.5s of error in 1s - // (ApproxUpdateInterval). - Frequency: 4500, - }, - errorNS: ReferenceNS(-500 * time.Millisecond.Nanoseconds()), - }, - { - // Host clock sped up, with now ahead of newParams. - name: "speed-up-nowdiff", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 50000, - // Host frequency changed to 9000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 9000, - }, - // nextRef = 6000ms - // nextCycles = 9000 * (6000ms - 5000ms) + 45000 - // nextCycles = 9000 * (1s) + 45000 - // nextCycles = 54000 - // f = (54000 - 50000) / 1s = 4000 - // - // ref = 5000ms - (50000 - 45000) / 4000 - // ref = 3.75s - want: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(3750 * time.Millisecond.Nanoseconds()), - Frequency: 4000, - }, - // oldNow = 50000 * 10000 = 5s - // newNow = (50000 - 45000) / 9000 + 5s = 5.555s - errorNS: ReferenceNS((5000*time.Millisecond - 5555555555).Nanoseconds()), - }, - { - // Host clock sped up. The new parameters are so far - // ahead that the next update time already passed. - name: "speed-up-uncorrectable-baseref", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 50000, - // Host frequency changed to 5000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(9000 * time.Millisecond.Nanoseconds()), - Frequency: 5000, - }, - // The next update should be at 10s, but newParams - // already passed 6s. Thus it is impossible to correct - // the clock by then. - wantErr: true, - }, - { - // Host clock sped up. The new parameters are moving so - // fast that the next update should be before now. - name: "speed-up-uncorrectable-frequency", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 55000, - // Host frequency changed to 7500 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(6000 * time.Millisecond.Nanoseconds()), - Frequency: 7500, - }, - // The next update should be at 6.5s, but newParams are - // so far ahead and fast that they reach 6.5s at cycle - // 48750, which before now! Thus it is impossible to - // correct the clock by then. - wantErr: true, - }, - { - // Host clock slowed down. - name: "slow-down", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 55000, - // Host frequency changed to 11000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 55000, - // From oldParams, we think ref = 5.5s at cycles = 55000. - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 11000, - }, - want: Parameters{ - BaseCycles: 55000, - BaseRef: ReferenceNS(5500 * time.Millisecond.Nanoseconds()), - // We must increase the new frequency by 50% to - // correct 0.5s of error in 1s - // (ApproxUpdateInterval). - Frequency: 16500, - }, - errorNS: ReferenceNS(500 * time.Millisecond.Nanoseconds()), - }, - { - // Host clock slowed down, with now ahead of newParams. - name: "slow-down-nowdiff", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 60000, - // Host frequency changed to 11000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 55000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 11000, - }, - // nextRef = 7000ms - // nextCycles = 11000 * (7000ms - 5000ms) + 55000 - // nextCycles = 11000 * (2000ms) + 55000 - // nextCycles = 77000 - // f = (77000 - 60000) / 1s = 17000 - // - // ref = 6000ms - (60000 - 55000) / 17000 - // ref = 5705882353ns - want: Parameters{ - BaseCycles: 55000, - BaseRef: ReferenceNS(5705882353), - Frequency: 17000, - }, - // oldNow = 60000 * 10000 = 6s - // newNow = (60000 - 55000) / 11000 + 5s = 5.4545s - errorNS: ReferenceNS((6*time.Second - 5454545454).Nanoseconds()), - }, - { - // Host time went backwards. - name: "time-backwards", - oldParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 60000, - newParams: Parameters{ - BaseCycles: 60000, - // From oldParams, we think ref = 6s at cycles = 60000. - BaseRef: ReferenceNS(4000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 60000, - BaseRef: ReferenceNS(6000 * time.Millisecond.Nanoseconds()), - // We must increase the frequency by 200% to - // correct 2s of error in 1s - // (ApproxUpdateInterval). - Frequency: 30000, - }, - errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()), - }, - { - // Host time went backwards, with now ahead of newParams. - name: "time-backwards-nowdiff", - oldParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 65000, - // nextRef = 7500ms - // nextCycles = 10000 * (7500ms - 4000ms) + 60000 - // nextCycles = 10000 * (3500ms) + 60000 - // nextCycles = 95000 - // f = (95000 - 65000) / 1s = 30000 - // - // ref = 6500ms - (65000 - 60000) / 30000 - // ref = 6333333333ns - newParams: Parameters{ - BaseCycles: 60000, - BaseRef: ReferenceNS(4000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 60000, - BaseRef: ReferenceNS(6333333334), - Frequency: 30000, - }, - // oldNow = 65000 * 10000 = 6.5s - // newNow = (65000 - 60000) / 10000 + 4s = 4.5s - errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got, errorNS, err := errorAdjust(tc.oldParams, tc.newParams, tc.now) - if err != nil && !tc.wantErr { - t.Errorf("err got %v want nil", err) - } else if err == nil && tc.wantErr { - t.Errorf("err got nil want non-nil") - } - - if got != tc.want { - t.Errorf("Parameters got %+v want %+v", got, tc.want) - } - if errorNS != tc.errorNS { - t.Errorf("errorNS got %v want %v", errorNS, tc.errorNS) - } - }) - } -} - -func testMuldiv(t *testing.T, v uint64) { - for i := uint64(1); i <= 1000000; i++ { - mult := uint64(1000000000) - div := i * mult - res, ok := muldiv64(v, mult, div) - if !ok { - t.Errorf("Result of %v * %v / %v ok got false want true", v, mult, div) - } - if want := v / i; res != want { - t.Errorf("Bad result of %v * %v / %v: got %v, want %v", v, mult, div, res, want) - } - } -} - -func TestMulDiv(t *testing.T) { - testMuldiv(t, math.MaxUint64) - for i := int64(-10); i <= 10; i++ { - testMuldiv(t, uint64(i)) - } -} - -func TestMulDivZero(t *testing.T) { - if r, ok := muldiv64(2, 4, 0); ok { - t.Errorf("muldiv64(2, 4, 0) got %d, ok want !ok", r) - } - - if r, ok := muldiv64(0, 0, 0); ok { - t.Errorf("muldiv64(0, 0, 0) got %d, ok want !ok", r) - } -} - -func TestMulDivOverflow(t *testing.T) { - testCases := []struct { - name string - val uint64 - mult uint64 - div uint64 - ok bool - ret uint64 - }{ - { - name: "2^62", - val: 1 << 63, - mult: 4, - div: 8, - ok: true, - ret: 1 << 62, - }, - { - name: "2^64-1", - val: 0xffffffffffffffff, - mult: 1, - div: 1, - ok: true, - ret: 0xffffffffffffffff, - }, - { - name: "2^64", - val: 1 << 63, - mult: 4, - div: 2, - ok: false, - }, - { - name: "2^125", - val: 1 << 63, - mult: 1 << 63, - div: 2, - ok: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - r, ok := muldiv64(tc.val, tc.mult, tc.div) - if ok != tc.ok { - t.Errorf("ok got %v want %v", ok, tc.ok) - } - if tc.ok && r != tc.ret { - t.Errorf("ret got %v want %v", r, tc.ret) - } - }) - } -} - -func BenchmarkMuldiv64(b *testing.B) { - var v uint64 = math.MaxUint64 - for i := uint64(1); i <= 1000000; i++ { - mult := uint64(1000000000) - div := i * mult - res, ok := muldiv64(v, mult, div) - if !ok { - b.Errorf("Result of %v * %v / %v ok got false want true", v, mult, div) - } - if want := v / i; res != want { - b.Errorf("Bad result of %v * %v / %v: got %v, want %v", v, mult, div, res, want) - } - } -} diff --git a/pkg/sentry/time/sampler_test.go b/pkg/sentry/time/sampler_test.go deleted file mode 100644 index 3e70a1134..000000000 --- a/pkg/sentry/time/sampler_test.go +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package time - -import ( - "errors" - "testing" -) - -// errNoSamples is returned when testReferenceClocks runs out of samples. -var errNoSamples = errors.New("no samples available") - -// testReferenceClocks returns a preset list of samples and cycle counts. -type testReferenceClocks struct { - samples []sample - cycles []TSCValue -} - -// Sample implements referenceClocks.Sample, returning the next sample in the list. -func (t *testReferenceClocks) Sample(_ ClockID) (sample, error) { - if len(t.samples) == 0 { - return sample{}, errNoSamples - } - - s := t.samples[0] - if len(t.samples) == 1 { - t.samples = nil - } else { - t.samples = t.samples[1:] - } - - return s, nil -} - -// Cycles implements referenceClocks.Cycles, returning the next TSCValue in the list. -func (t *testReferenceClocks) Cycles() TSCValue { - if len(t.cycles) == 0 { - return 0 - } - - c := t.cycles[0] - if len(t.cycles) == 1 { - t.cycles = nil - } else { - t.cycles = t.cycles[1:] - } - - return c -} - -// newTestSampler returns a sampler that collects samples from -// the given sample list and cycle counts from the given cycle list. -func newTestSampler(samples []sample, cycles []TSCValue) *sampler { - return &sampler{ - clocks: &testReferenceClocks{ - samples: samples, - cycles: cycles, - }, - overhead: defaultOverheadCycles, - } -} - -// generateSamples generates n samples with the given overhead. -func generateSamples(n int, overhead TSCValue) []sample { - samples := []sample{{before: 1000000, after: 1000000 + overhead, ref: 100}} - for i := 0; i < n-1; i++ { - prev := samples[len(samples)-1] - samples = append(samples, sample{ - before: prev.before + 1000000, - after: prev.after + 1000000, - ref: prev.ref + 100, - }) - } - return samples -} - -// TestSample ensures that samples can be collected. -func TestSample(t *testing.T) { - testCases := []struct { - name string - samples []sample - err error - }{ - { - name: "basic", - samples: []sample{ - {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, - }, - err: nil, - }, - { - // Sample with backwards TSC ignored. - // referenceClock should retry and get errNoSamples. - name: "backwards-tsc-ignored", - samples: []sample{ - {before: 100000, after: 90000, ref: 100}, - }, - err: errNoSamples, - }, - { - // Sample far above overhead skipped. - // referenceClock should retry and get errNoSamples. - name: "reject-overhead", - samples: []sample{ - {before: 100000, after: 100000 + 5*defaultOverheadCycles, ref: 100}, - }, - err: errNoSamples, - }, - { - // Maximum overhead allowed is bounded. - name: "over-max-overhead", - // Generate a bunch of samples. The reference clock - // needs a while to ramp up its expected overhead. - samples: generateSamples(100, 2*maxOverheadCycles), - err: errOverheadTooHigh, - }, - { - // Overhead at maximum overhead is allowed. - name: "max-overhead", - // Generate a bunch of samples. The reference clock - // needs a while to ramp up its expected overhead. - samples: generateSamples(100, maxOverheadCycles), - err: nil, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - s := newTestSampler(tc.samples, nil) - err := s.Sample() - if err != tc.err { - t.Errorf("Sample err got %v want %v", err, tc.err) - } - }) - } -} - -// TestOutliersIgnored tests that referenceClock ignores samples with very high -// overhead. -func TestOutliersIgnored(t *testing.T) { - s := newTestSampler([]sample{ - {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, - {before: 200000, after: 200000 + defaultOverheadCycles, ref: 200}, - {before: 300000, after: 300000 + defaultOverheadCycles, ref: 300}, - {before: 400000, after: 400000 + defaultOverheadCycles, ref: 400}, - {before: 500000, after: 500000 + 5*defaultOverheadCycles, ref: 500}, // Ignored - {before: 600000, after: 600000 + defaultOverheadCycles, ref: 600}, - {before: 700000, after: 700000 + defaultOverheadCycles, ref: 700}, - }, nil) - - // Collect 5 samples. - for i := 0; i < 5; i++ { - err := s.Sample() - if err != nil { - t.Fatalf("Unexpected error while sampling: %v", err) - } - } - - oldest, newest, ok := s.Range() - if !ok { - t.Fatalf("Range not ok") - } - - if oldest.ref != 100 { - t.Errorf("oldest.ref got %v want %v", oldest.ref, 100) - } - - // We skipped the high-overhead sample. - if newest.ref != 600 { - t.Errorf("newest.ref got %v want %v", newest.ref, 600) - } -} diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go new file mode 100644 index 000000000..357e476ec --- /dev/null +++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go @@ -0,0 +1,38 @@ +package time + +import ( + "unsafe" + + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/sync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in seq. +// +//go:nosplit +func SeqAtomicLoadParameters(seq *sync.SeqCount, ptr *Parameters) Parameters { + for { + if val, ok := SeqAtomicTryLoadParameters(seq, seq.BeginRead(), ptr); ok { + return val + } + } +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in seq initiated by a call to seq.BeginRead() that returned epoch. If the +// read would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +// +//go:nosplit +func SeqAtomicTryLoadParameters(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Parameters) (val Parameters, ok bool) { + if sync.RaceEnabled { + + gohacks.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + ok = seq.ReadOk(epoch) + return +} diff --git a/pkg/sentry/time/time_amd64_state_autogen.go b/pkg/sentry/time/time_amd64_state_autogen.go new file mode 100644 index 000000000..3306b8b88 --- /dev/null +++ b/pkg/sentry/time/time_amd64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build amd64 +// +build amd64 + +package time diff --git a/pkg/sentry/time/time_arm64_state_autogen.go b/pkg/sentry/time/time_arm64_state_autogen.go new file mode 100644 index 000000000..233d240c4 --- /dev/null +++ b/pkg/sentry/time/time_arm64_state_autogen.go @@ -0,0 +1,6 @@ +// automatically generated by stateify. + +//go:build arm64 +// +build arm64 + +package time diff --git a/pkg/sentry/time/time_state_autogen.go b/pkg/sentry/time/time_state_autogen.go new file mode 100644 index 000000000..2adc9c9e0 --- /dev/null +++ b/pkg/sentry/time/time_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package time diff --git a/pkg/sentry/time/time_unsafe_state_autogen.go b/pkg/sentry/time/time_unsafe_state_autogen.go new file mode 100644 index 000000000..2adc9c9e0 --- /dev/null +++ b/pkg/sentry/time/time_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package time diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD deleted file mode 100644 index 5d4aa3a63..000000000 --- a/pkg/sentry/unimpl/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library", "proto_library") - -package(licenses = ["notice"]) - -proto_library( - name = "unimplemented_syscall", - srcs = ["unimplemented_syscall.proto"], - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_proto"], -) - -go_library( - name = "unimpl", - srcs = ["events.go"], - visibility = ["//:sandbox"], - deps = [ - "//pkg/context", - "//pkg/log", - ], -) diff --git a/pkg/sentry/unimpl/unimpl_state_autogen.go b/pkg/sentry/unimpl/unimpl_state_autogen.go new file mode 100644 index 000000000..b37d16f87 --- /dev/null +++ b/pkg/sentry/unimpl/unimpl_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package unimpl diff --git a/pkg/sentry/unimpl/unimplemented_syscall.proto b/pkg/sentry/unimpl/unimplemented_syscall.proto deleted file mode 100644 index 0d7a94be7..000000000 --- a/pkg/sentry/unimpl/unimplemented_syscall.proto +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -import "pkg/sentry/arch/registers.proto"; - -message UnimplementedSyscall { - // Task ID. - int32 tid = 1; - - // Registers at the time of the call. - Registers registers = 2; -} diff --git a/pkg/sentry/unimpl/unimplemented_syscall_go_proto/unimplemented_syscall.pb.go b/pkg/sentry/unimpl/unimplemented_syscall_go_proto/unimplemented_syscall.pb.go new file mode 100644 index 000000000..aa85c330a --- /dev/null +++ b/pkg/sentry/unimpl/unimplemented_syscall_go_proto/unimplemented_syscall.pb.go @@ -0,0 +1,164 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.13.0 +// source: pkg/sentry/unimpl/unimplemented_syscall.proto + +package gvisor + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + registers_go_proto "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type UnimplementedSyscall struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Tid int32 `protobuf:"varint,1,opt,name=tid,proto3" json:"tid,omitempty"` + Registers *registers_go_proto.Registers `protobuf:"bytes,2,opt,name=registers,proto3" json:"registers,omitempty"` +} + +func (x *UnimplementedSyscall) Reset() { + *x = UnimplementedSyscall{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_sentry_unimpl_unimplemented_syscall_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *UnimplementedSyscall) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UnimplementedSyscall) ProtoMessage() {} + +func (x *UnimplementedSyscall) ProtoReflect() protoreflect.Message { + mi := &file_pkg_sentry_unimpl_unimplemented_syscall_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UnimplementedSyscall.ProtoReflect.Descriptor instead. +func (*UnimplementedSyscall) Descriptor() ([]byte, []int) { + return file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescGZIP(), []int{0} +} + +func (x *UnimplementedSyscall) GetTid() int32 { + if x != nil { + return x.Tid + } + return 0 +} + +func (x *UnimplementedSyscall) GetRegisters() *registers_go_proto.Registers { + if x != nil { + return x.Registers + } + return nil +} + +var File_pkg_sentry_unimpl_unimplemented_syscall_proto protoreflect.FileDescriptor + +var file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDesc = []byte{ + 0x0a, 0x2d, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x2f, 0x75, 0x6e, 0x69, + 0x6d, 0x70, 0x6c, 0x2f, 0x75, 0x6e, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x65, + 0x64, 0x5f, 0x73, 0x79, 0x73, 0x63, 0x61, 0x6c, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x06, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x1a, 0x1f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x6e, + 0x74, 0x72, 0x79, 0x2f, 0x61, 0x72, 0x63, 0x68, 0x2f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, + 0x72, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x59, 0x0a, 0x14, 0x55, 0x6e, 0x69, 0x6d, + 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x65, 0x64, 0x53, 0x79, 0x73, 0x63, 0x61, 0x6c, 0x6c, + 0x12, 0x10, 0x0a, 0x03, 0x74, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x74, + 0x69, 0x64, 0x12, 0x2f, 0x0a, 0x09, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x67, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x52, + 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x73, 0x52, 0x09, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, + 0x65, 0x72, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescOnce sync.Once + file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescData = file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDesc +) + +func file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescGZIP() []byte { + file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescOnce.Do(func() { + file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescData) + }) + return file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDescData +} + +var file_pkg_sentry_unimpl_unimplemented_syscall_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_pkg_sentry_unimpl_unimplemented_syscall_proto_goTypes = []interface{}{ + (*UnimplementedSyscall)(nil), // 0: gvisor.UnimplementedSyscall + (*registers_go_proto.Registers)(nil), // 1: gvisor.Registers +} +var file_pkg_sentry_unimpl_unimplemented_syscall_proto_depIdxs = []int32{ + 1, // 0: gvisor.UnimplementedSyscall.registers:type_name -> gvisor.Registers + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_pkg_sentry_unimpl_unimplemented_syscall_proto_init() } +func file_pkg_sentry_unimpl_unimplemented_syscall_proto_init() { + if File_pkg_sentry_unimpl_unimplemented_syscall_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_sentry_unimpl_unimplemented_syscall_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*UnimplementedSyscall); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_pkg_sentry_unimpl_unimplemented_syscall_proto_goTypes, + DependencyIndexes: file_pkg_sentry_unimpl_unimplemented_syscall_proto_depIdxs, + MessageInfos: file_pkg_sentry_unimpl_unimplemented_syscall_proto_msgTypes, + }.Build() + File_pkg_sentry_unimpl_unimplemented_syscall_proto = out.File + file_pkg_sentry_unimpl_unimplemented_syscall_proto_rawDesc = nil + file_pkg_sentry_unimpl_unimplemented_syscall_proto_goTypes = nil + file_pkg_sentry_unimpl_unimplemented_syscall_proto_depIdxs = nil +} diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD deleted file mode 100644 index 7467e6398..000000000 --- a/pkg/sentry/uniqueid/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "uniqueid", - srcs = ["context.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/sentry/socket/unix/transport", - ], -) diff --git a/pkg/sentry/uniqueid/uniqueid_state_autogen.go b/pkg/sentry/uniqueid/uniqueid_state_autogen.go new file mode 100644 index 000000000..1890fdf46 --- /dev/null +++ b/pkg/sentry/uniqueid/uniqueid_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package uniqueid diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD deleted file mode 100644 index 8e2b3ed79..000000000 --- a/pkg/sentry/usage/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "usage", - srcs = [ - "cpu.go", - "io.go", - "memory.go", - "memory_unsafe.go", - "usage.go", - ], - visibility = [ - "//:sandbox", - ], - deps = [ - "//pkg/bits", - "//pkg/memutil", - "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/usage/usage_state_autogen.go b/pkg/sentry/usage/usage_state_autogen.go new file mode 100644 index 000000000..a0077a6ca --- /dev/null +++ b/pkg/sentry/usage/usage_state_autogen.go @@ -0,0 +1,86 @@ +// automatically generated by stateify. + +package usage + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *CPUStats) StateTypeName() string { + return "pkg/sentry/usage.CPUStats" +} + +func (s *CPUStats) StateFields() []string { + return []string{ + "UserTime", + "SysTime", + "VoluntarySwitches", + } +} + +func (s *CPUStats) beforeSave() {} + +// +checklocksignore +func (s *CPUStats) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.UserTime) + stateSinkObject.Save(1, &s.SysTime) + stateSinkObject.Save(2, &s.VoluntarySwitches) +} + +func (s *CPUStats) afterLoad() {} + +// +checklocksignore +func (s *CPUStats) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.UserTime) + stateSourceObject.Load(1, &s.SysTime) + stateSourceObject.Load(2, &s.VoluntarySwitches) +} + +func (i *IO) StateTypeName() string { + return "pkg/sentry/usage.IO" +} + +func (i *IO) StateFields() []string { + return []string{ + "CharsRead", + "CharsWritten", + "ReadSyscalls", + "WriteSyscalls", + "BytesRead", + "BytesWritten", + "BytesWriteCancelled", + } +} + +func (i *IO) beforeSave() {} + +// +checklocksignore +func (i *IO) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.CharsRead) + stateSinkObject.Save(1, &i.CharsWritten) + stateSinkObject.Save(2, &i.ReadSyscalls) + stateSinkObject.Save(3, &i.WriteSyscalls) + stateSinkObject.Save(4, &i.BytesRead) + stateSinkObject.Save(5, &i.BytesWritten) + stateSinkObject.Save(6, &i.BytesWriteCancelled) +} + +func (i *IO) afterLoad() {} + +// +checklocksignore +func (i *IO) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.CharsRead) + stateSourceObject.Load(1, &i.CharsWritten) + stateSourceObject.Load(2, &i.ReadSyscalls) + stateSourceObject.Load(3, &i.WriteSyscalls) + stateSourceObject.Load(4, &i.BytesRead) + stateSourceObject.Load(5, &i.BytesWritten) + stateSourceObject.Load(6, &i.BytesWriteCancelled) +} + +func init() { + state.Register((*CPUStats)(nil)) + state.Register((*IO)(nil)) +} diff --git a/pkg/sentry/usage/usage_unsafe_state_autogen.go b/pkg/sentry/usage/usage_unsafe_state_autogen.go new file mode 100644 index 000000000..5d9e86efa --- /dev/null +++ b/pkg/sentry/usage/usage_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package usage diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD deleted file mode 100644 index a2032162d..000000000 --- a/pkg/sentry/vfs/BUILD +++ /dev/null @@ -1,143 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -licenses(["notice"]) - -go_template_instance( - name = "epoll_interest_list", - out = "epoll_interest_list.go", - package = "vfs", - prefix = "epollInterest", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*epollInterest", - "Linker": "*epollInterest", - }, -) - -go_template_instance( - name = "event_list", - out = "event_list.go", - package = "vfs", - prefix = "event", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Event", - "Linker": "*Event", - }, -) - -go_template_instance( - name = "file_description_refs", - out = "file_description_refs.go", - package = "vfs", - prefix = "FileDescription", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "FileDescription", - }, -) - -go_template_instance( - name = "mount_namespace_refs", - out = "mount_namespace_refs.go", - package = "vfs", - prefix = "MountNamespace", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "MountNamespace", - }, -) - -go_template_instance( - name = "filesystem_refs", - out = "filesystem_refs.go", - package = "vfs", - prefix = "Filesystem", - template = "//pkg/refsvfs2:refs_template", - types = { - "T": "Filesystem", - }, -) - -go_library( - name = "vfs", - srcs = [ - "anonfs.go", - "context.go", - "debug.go", - "dentry.go", - "device.go", - "epoll.go", - "epoll_interest_list.go", - "event_list.go", - "file_description.go", - "file_description_impl_util.go", - "file_description_refs.go", - "filesystem.go", - "filesystem_impl_util.go", - "filesystem_refs.go", - "filesystem_type.go", - "inotify.go", - "lock.go", - "mount.go", - "mount_namespace_refs.go", - "mount_unsafe.go", - "opath.go", - "options.go", - "pathname.go", - "permissions.go", - "resolving_path.go", - "save_restore.go", - "vfs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/fspath", - "//pkg/gohacks", - "//pkg/hostarch", - "//pkg/log", - "//pkg/refs", - "//pkg/refsvfs2", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/fs", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fsmetric", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/uniqueid", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -go_test( - name = "vfs_test", - size = "small", - srcs = [ - "file_description_impl_util_test.go", - "mount_test.go", - ], - library = ":vfs", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/errors/linuxerr", - "//pkg/sentry/contexttest", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md deleted file mode 100644 index 5aad31b78..000000000 --- a/pkg/sentry/vfs/README.md +++ /dev/null @@ -1,186 +0,0 @@ -# The gVisor Virtual Filesystem - -THIS PACKAGE IS CURRENTLY EXPERIMENTAL AND NOT READY OR ENABLED FOR PRODUCTION -USE. For the filesystem implementation currently used by gVisor, see the `fs` -package. - -## Implementation Notes - -### Reference Counting - -Filesystem, Dentry, Mount, MountNamespace, and FileDescription are all -reference-counted. Mount and MountNamespace are exclusively VFS-managed; when -their reference count reaches zero, VFS releases their resources. Filesystem and -FileDescription management is shared between VFS and filesystem implementations; -when their reference count reaches zero, VFS notifies the implementation by -calling `FilesystemImpl.Release()` or `FileDescriptionImpl.Release()` -respectively and then releases VFS-owned resources. Dentries are exclusively -managed by filesystem implementations; reference count changes are abstracted -through DentryImpl, which should release resources when reference count reaches -zero. - -Filesystem references are held by: - -- Mount: Each referenced Mount holds a reference on the mounted Filesystem. - -Dentry references are held by: - -- FileDescription: Each referenced FileDescription holds a reference on the - Dentry through which it was opened, via `FileDescription.vd.dentry`. - -- Mount: Each referenced Mount holds a reference on its mount point and on the - mounted filesystem root. The mount point is mutable (`mount(MS_MOVE)`). - -Mount references are held by: - -- FileDescription: Each referenced FileDescription holds a reference on the - Mount on which it was opened, via `FileDescription.vd.mount`. - -- Mount: Each referenced Mount holds a reference on its parent, which is the - mount containing its mount point. - -- VirtualFilesystem: A reference is held on each Mount that has been connected - to a mount point, but not yet umounted. - -MountNamespace and FileDescription references are held by users of VFS. The -expectation is that each `kernel.Task` holds a reference on its corresponding -MountNamespace, and each file descriptor holds a reference on its represented -FileDescription. - -Notes: - -- Dentries do not hold a reference on their owning Filesystem. Instead, all - uses of a Dentry occur in the context of a Mount, which holds a reference on - the relevant Filesystem (see e.g. the VirtualDentry type). As a corollary, - when releasing references on both a Dentry and its corresponding Mount, the - Dentry's reference must be released first (because releasing the Mount's - reference may release the last reference on the Filesystem, whose state may - be required to release the Dentry reference). - -### The Inheritance Pattern - -Filesystem, Dentry, and FileDescription are all concepts featuring both state -that must be shared between VFS and filesystem implementations, and operations -that are implementation-defined. To facilitate this, each of these three -concepts follows the same pattern, shown below for Dentry: - -```go -// Dentry represents a node in a filesystem tree. -type Dentry struct { - // VFS-required dentry state. - parent *Dentry - // ... - - // impl is the DentryImpl associated with this Dentry. impl is immutable. - // This should be the last field in Dentry. - impl DentryImpl -} - -// Init must be called before first use of d. -func (d *Dentry) Init(impl DentryImpl) { - d.impl = impl -} - -// Impl returns the DentryImpl associated with d. -func (d *Dentry) Impl() DentryImpl { - return d.impl -} - -// DentryImpl contains implementation-specific details of a Dentry. -// Implementations of DentryImpl should contain their associated Dentry by -// value as their first field. -type DentryImpl interface { - // VFS-required implementation-defined dentry operations. - IncRef() - // ... -} -``` - -This construction, which is essentially a type-safe analogue to Linux's -`container_of` pattern, has the following properties: - -- VFS works almost exclusively with pointers to Dentry rather than DentryImpl - interface objects, such as in the type of `Dentry.parent`. This avoids - interface method calls (which are somewhat expensive to perform, and defeat - inlining and escape analysis), reduces the size of VFS types (since an - interface object is two pointers in size), and allows pointers to be loaded - and stored atomically using `sync/atomic`. Implementation-defined behavior - is accessed via `Dentry.impl` when required. - -- Filesystem implementations can access the implementation-defined state - associated with objects of VFS types by type-asserting or type-switching - (e.g. `Dentry.Impl().(*myDentry)`). Type assertions to a concrete type - require only an equality comparison of the interface object's type pointer - to a static constant, and are consequently very fast. - -- Filesystem implementations can access the VFS state associated with objects - of implementation-defined types directly. - -- VFS and implementation-defined state for a given type occupy the same - object, minimizing memory allocations and maximizing memory locality. `impl` - is the last field in `Dentry`, and `Dentry` is the first field in - `DentryImpl` implementations, for similar reasons: this tends to cause - fetching of the `Dentry.impl` interface object to also fetch `DentryImpl` - fields, either because they are in the same cache line or via next-line - prefetching. - -## Future Work - -- Most `mount(2)` features, and unmounting, are incomplete. - -- VFS1 filesystems are not directly compatible with VFS2. It may be possible - to implement shims that implement `vfs.FilesystemImpl` for - `fs.MountNamespace`, `vfs.DentryImpl` for `fs.Dirent`, and - `vfs.FileDescriptionImpl` for `fs.File`, which may be adequate for - filesystems that are not performance-critical (e.g. sysfs); however, it is - not clear that this will be less effort than simply porting the filesystems - in question. Practically speaking, the following filesystems will probably - need to be ported or made compatible through a shim to evaluate filesystem - performance on realistic workloads: - - - devfs/procfs/sysfs, which will realistically be necessary to execute - most applications. (Note that procfs and sysfs do not support hard - links, so they do not require the complexity of separate inode objects. - Also note that Linux's /dev is actually a variant of tmpfs called - devtmpfs.) - - - tmpfs. This should be relatively straightforward: copy/paste memfs, - store regular file contents in pgalloc-allocated memory instead of - `[]byte`, and add support for file timestamps. (In fact, it probably - makes more sense to convert memfs to tmpfs and not keep the former.) - - - A remote filesystem, either lisafs (if it is ready by the time that - other benchmarking prerequisites are) or v9fs (aka 9P, aka gofers). - - - epoll files. - - Filesystems that will need to be ported before switching to VFS2, but can - probably be skipped for early testing: - - - overlayfs, which is needed for (at least) synthetic mount points. - - - Support for host ttys. - - - timerfd files. - - Filesystems that can be probably dropped: - - - ashmem, which is far too incomplete to use. - - - binder, which is similarly far too incomplete to use. - -- Save/restore. For instance, it is unclear if the current implementation of - the `state` package supports the inheritance pattern described above. - -- Many features that were previously implemented by VFS must now be - implemented by individual filesystems (though, in most cases, this should - consist of calls to hooks or libraries provided by `vfs` or other packages). - This includes, but is not necessarily limited to: - - - Block and character device special files - - - Inotify - - - File locking - - - `O_ASYNC` diff --git a/pkg/sentry/vfs/epoll_interest_list.go b/pkg/sentry/vfs/epoll_interest_list.go new file mode 100644 index 000000000..e75ea361b --- /dev/null +++ b/pkg/sentry/vfs/epoll_interest_list.go @@ -0,0 +1,221 @@ +package vfs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type epollInterestElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (epollInterestElementMapper) linkerFor(elem *epollInterest) *epollInterest { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type epollInterestList struct { + head *epollInterest + tail *epollInterest +} + +// Reset resets list l to the empty state. +func (l *epollInterestList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *epollInterestList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *epollInterestList) Front() *epollInterest { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *epollInterestList) Back() *epollInterest { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *epollInterestList) Len() (count int) { + for e := l.Front(); e != nil; e = (epollInterestElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *epollInterestList) PushFront(e *epollInterest) { + linker := epollInterestElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + epollInterestElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *epollInterestList) PushBack(e *epollInterest) { + linker := epollInterestElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + epollInterestElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *epollInterestList) PushBackList(m *epollInterestList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + epollInterestElementMapper{}.linkerFor(l.tail).SetNext(m.head) + epollInterestElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *epollInterestList) InsertAfter(b, e *epollInterest) { + bLinker := epollInterestElementMapper{}.linkerFor(b) + eLinker := epollInterestElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + epollInterestElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *epollInterestList) InsertBefore(a, e *epollInterest) { + aLinker := epollInterestElementMapper{}.linkerFor(a) + eLinker := epollInterestElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + epollInterestElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *epollInterestList) Remove(e *epollInterest) { + linker := epollInterestElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + epollInterestElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + epollInterestElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type epollInterestEntry struct { + next *epollInterest + prev *epollInterest +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *epollInterestEntry) Next() *epollInterest { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *epollInterestEntry) Prev() *epollInterest { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *epollInterestEntry) SetNext(elem *epollInterest) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *epollInterestEntry) SetPrev(elem *epollInterest) { + e.prev = elem +} diff --git a/pkg/sentry/vfs/event_list.go b/pkg/sentry/vfs/event_list.go new file mode 100644 index 000000000..c0946b585 --- /dev/null +++ b/pkg/sentry/vfs/event_list.go @@ -0,0 +1,221 @@ +package vfs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type eventElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (eventElementMapper) linkerFor(elem *Event) *Event { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type eventList struct { + head *Event + tail *Event +} + +// Reset resets list l to the empty state. +func (l *eventList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *eventList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *eventList) Front() *Event { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *eventList) Back() *Event { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *eventList) Len() (count int) { + for e := l.Front(); e != nil; e = (eventElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *eventList) PushFront(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + eventElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *eventList) PushBack(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *eventList) PushBackList(m *eventList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(m.head) + eventElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *eventList) InsertAfter(b, e *Event) { + bLinker := eventElementMapper{}.linkerFor(b) + eLinker := eventElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + eventElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *eventList) InsertBefore(a, e *Event) { + aLinker := eventElementMapper{}.linkerFor(a) + eLinker := eventElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + eventElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *eventList) Remove(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + eventElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + eventElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type eventEntry struct { + next *Event + prev *Event +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *eventEntry) Next() *Event { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *eventEntry) Prev() *Event { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *eventEntry) SetNext(elem *Event) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *eventEntry) SetPrev(elem *Event) { + e.prev = elem +} diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go deleted file mode 100644 index 3423dede1..000000000 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "bytes" - "fmt" - "io" - "sync/atomic" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// fileDescription is the common fd struct which a filesystem implementation -// embeds in all of its file description implementations as required. -type fileDescription struct { - vfsfd FileDescription - FileDescriptionDefaultImpl - NoLockFD -} - -// genCount contains the number of times its DynamicBytesSource.Generate() -// implementation has been called. -type genCount struct { - count uint64 // accessed using atomic memory ops -} - -// Generate implements DynamicBytesSource.Generate. -func (g *genCount) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%d", atomic.AddUint64(&g.count, 1)) - return nil -} - -type storeData struct { - data string -} - -var _ WritableDynamicBytesSource = (*storeData)(nil) - -// Generate implements DynamicBytesSource. -func (d *storeData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(d.data) - return nil -} - -// Generate implements WritableDynamicBytesSource. -func (d *storeData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { - buf := make([]byte, src.NumBytes()) - n, err := src.CopyIn(ctx, buf) - if err != nil { - return 0, err - } - - d.data = string(buf[:n]) - return 0, nil -} - -// testFD is a read-only FileDescriptionImpl representing a regular file. -type testFD struct { - fileDescription - DynamicBytesFileDescriptionImpl - - data DynamicBytesSource -} - -func newTestFD(ctx context.Context, vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesSource) *FileDescription { - vd := vfsObj.NewAnonVirtualDentry("genCountFD") - defer vd.DecRef(ctx) - var fd testFD - fd.vfsfd.Init(&fd, statusFlags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}) - fd.DynamicBytesFileDescriptionImpl.SetDataSource(data) - return &fd.vfsfd -} - -// Release implements FileDescriptionImpl.Release. -func (fd *testFD) Release(context.Context) { -} - -// SetStatusFlags implements FileDescriptionImpl.SetStatusFlags. -// Stat implements FileDescriptionImpl.Stat. -func (fd *testFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) { - // Note that Statx.Mask == 0 in the return value. - return linux.Statx{}, nil -} - -// SetStat implements FileDescriptionImpl.SetStat. -func (fd *testFD) SetStat(ctx context.Context, opts SetStatOptions) error { - return linuxerr.EPERM -} - -func TestGenCountFD(t *testing.T) { - ctx := contexttest.Context(t) - - vfsObj := &VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - fd := newTestFD(ctx, vfsObj, linux.O_RDWR, &genCount{}) - defer fd.DecRef(ctx) - - // The first read causes Generate to be called to fill the FD's buffer. - buf := make([]byte, 2) - ioseq := usermem.BytesIOSequence(buf) - n, err := fd.Read(ctx, ioseq, ReadOptions{}) - if n != 1 || (err != nil && err != io.EOF) { - t.Fatalf("first Read: got (%d, %v), wanted (1, nil or EOF)", n, err) - } - if want := byte('1'); buf[0] != want { - t.Errorf("first Read: got byte %c, wanted %c", buf[0], want) - } - - // A second read without seeking is still at EOF. - n, err = fd.Read(ctx, ioseq, ReadOptions{}) - if n != 0 || err != io.EOF { - t.Fatalf("second Read: got (%d, %v), wanted (0, EOF)", n, err) - } - - // Seeking to the beginning of the file causes it to be regenerated. - n, err = fd.Seek(ctx, 0, linux.SEEK_SET) - if n != 0 || err != nil { - t.Fatalf("Seek: got (%d, %v), wanted (0, nil)", n, err) - } - n, err = fd.Read(ctx, ioseq, ReadOptions{}) - if n != 1 || (err != nil && err != io.EOF) { - t.Fatalf("Read after Seek: got (%d, %v), wanted (1, nil or EOF)", n, err) - } - if want := byte('2'); buf[0] != want { - t.Errorf("Read after Seek: got byte %c, wanted %c", buf[0], want) - } - - // PRead at the beginning of the file also causes it to be regenerated. - n, err = fd.PRead(ctx, ioseq, 0, ReadOptions{}) - if n != 1 || (err != nil && err != io.EOF) { - t.Fatalf("PRead: got (%d, %v), wanted (1, nil or EOF)", n, err) - } - if want := byte('3'); buf[0] != want { - t.Errorf("PRead: got byte %c, wanted %c", buf[0], want) - } - - // Write and PWrite fails. - if _, err := fd.Write(ctx, ioseq, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) - } - if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) - } -} - -func TestWritable(t *testing.T) { - ctx := contexttest.Context(t) - - vfsObj := &VirtualFilesystem{} - if err := vfsObj.Init(ctx); err != nil { - t.Fatalf("VFS init: %v", err) - } - fd := newTestFD(ctx, vfsObj, linux.O_RDWR, &storeData{data: "init"}) - defer fd.DecRef(ctx) - - buf := make([]byte, 10) - ioseq := usermem.BytesIOSequence(buf) - if n, err := fd.Read(ctx, ioseq, ReadOptions{}); n != 4 && err != io.EOF { - t.Fatalf("Read: got (%v, %v), wanted (4, EOF)", n, err) - } - if want := "init"; want == string(buf) { - t.Fatalf("Read: got %v, wanted %v", string(buf), want) - } - - // Test PWrite. - want := "write" - writeIOSeq := usermem.BytesIOSequence([]byte(want)) - if n, err := fd.PWrite(ctx, writeIOSeq, 0, WriteOptions{}); int(n) != len(want) && err != nil { - t.Errorf("PWrite: got err (%v, %v), wanted (%v, nil)", n, err, len(want)) - } - if n, err := fd.PRead(ctx, ioseq, 0, ReadOptions{}); int(n) != len(want) && err != io.EOF { - t.Fatalf("PRead: got (%v, %v), wanted (%v, EOF)", n, err, len(want)) - } - if want == string(buf) { - t.Fatalf("PRead: got %v, wanted %v", string(buf), want) - } - - // Test Seek to 0 followed by Write. - want = "write2" - writeIOSeq = usermem.BytesIOSequence([]byte(want)) - if n, err := fd.Seek(ctx, 0, linux.SEEK_SET); n != 0 && err != nil { - t.Errorf("Seek: got err (%v, %v), wanted (0, nil)", n, err) - } - if n, err := fd.Write(ctx, writeIOSeq, WriteOptions{}); int(n) != len(want) && err != nil { - t.Errorf("Write: got err (%v, %v), wanted (%v, nil)", n, err, len(want)) - } - if n, err := fd.PRead(ctx, ioseq, 0, ReadOptions{}); int(n) != len(want) && err != io.EOF { - t.Fatalf("PRead: got (%v, %v), wanted (%v, EOF)", n, err, len(want)) - } - if want == string(buf) { - t.Fatalf("PRead: got %v, wanted %v", string(buf), want) - } - - // Test failure if offset != 0. - if n, err := fd.Seek(ctx, 1, linux.SEEK_SET); n != 0 && err != nil { - t.Errorf("Seek: got err (%v, %v), wanted (0, nil)", n, err) - } - if n, err := fd.Write(ctx, writeIOSeq, WriteOptions{}); n != 0 && !linuxerr.Equals(linuxerr.EINVAL, err) { - t.Errorf("Write: got err (%v, %v), wanted (0, EINVAL)", n, err) - } - if n, err := fd.PWrite(ctx, writeIOSeq, 2, WriteOptions{}); n != 0 && !linuxerr.Equals(linuxerr.EINVAL, err) { - t.Errorf("PWrite: got err (%v, %v), wanted (0, EINVAL)", n, err) - } -} diff --git a/pkg/sentry/vfs/file_description_refs.go b/pkg/sentry/vfs/file_description_refs.go new file mode 100644 index 000000000..9e6b7bd40 --- /dev/null +++ b/pkg/sentry/vfs/file_description_refs.go @@ -0,0 +1,140 @@ +package vfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const FileDescriptionenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var FileDescriptionobj *FileDescription + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type FileDescriptionRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *FileDescriptionRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *FileDescriptionRefs) RefType() string { + return fmt.Sprintf("%T", FileDescriptionobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *FileDescriptionRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *FileDescriptionRefs) LogRefs() bool { + return FileDescriptionenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *FileDescriptionRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *FileDescriptionRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if FileDescriptionenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *FileDescriptionRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if FileDescriptionenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *FileDescriptionRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if FileDescriptionenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *FileDescriptionRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/vfs/filesystem_refs.go b/pkg/sentry/vfs/filesystem_refs.go new file mode 100644 index 000000000..fc47919d0 --- /dev/null +++ b/pkg/sentry/vfs/filesystem_refs.go @@ -0,0 +1,140 @@ +package vfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const FilesystemenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var Filesystemobj *Filesystem + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type FilesystemRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *FilesystemRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *FilesystemRefs) RefType() string { + return fmt.Sprintf("%T", Filesystemobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *FilesystemRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *FilesystemRefs) LogRefs() bool { + return FilesystemenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *FilesystemRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *FilesystemRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if FilesystemenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *FilesystemRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if FilesystemenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *FilesystemRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if FilesystemenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *FilesystemRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/vfs/g3doc/inotify.md b/pkg/sentry/vfs/g3doc/inotify.md deleted file mode 100644 index 833db213f..000000000 --- a/pkg/sentry/vfs/g3doc/inotify.md +++ /dev/null @@ -1,210 +0,0 @@ -# Inotify - -Inotify is a mechanism for monitoring filesystem events in Linux--see -inotify(7). An inotify instance can be used to monitor files and directories for -modifications, creation/deletion, etc. The inotify API consists of system calls -that create inotify instances (inotify_init/inotify_init1) and add/remove -watches on files to an instance (inotify_add_watch/inotify_rm_watch). Events are -generated from various places in the sentry, including the syscall layer, the -vfs layer, the process fd table, and within each filesystem implementation. This -document outlines the implementation details of inotify in VFS2. - -## Inotify Objects - -Inotify data structures are implemented in the vfs package. - -### vfs.Inotify - -Inotify instances are represented by vfs.Inotify objects, which implement -vfs.FileDescriptionImpl. As in Linux, inotify fds are backed by a -pseudo-filesystem (anonfs). Each inotify instance receives events from a set of -vfs.Watch objects, which can be modified with inotify_add_watch(2) and -inotify_rm_watch(2). An application can retrieve events by reading the inotify -fd. - -### vfs.Watches - -The set of all watches held on a single file (i.e., the watch target) is stored -in vfs.Watches. Each watch will belong to a different inotify instance (an -instance can only have one watch on any watch target). The watches are stored in -a map indexed by their vfs.Inotify owner’s id. Hard links and file descriptions -to a single file will all share the same vfs.Watches (with the exception of the -gofer filesystem, described in a later section). Activity on the target causes -its vfs.Watches to generate notifications on its watches’ inotify instances. - -### vfs.Watch - -A single watch, owned by one inotify instance and applied to one watch target. -Both the vfs.Inotify owner and vfs.Watches on the target will hold a vfs.Watch, -which leads to some complicated locking behavior (see Lock Ordering). Whenever a -watch is notified of an event on its target, it will queue events to its inotify -instance for delivery to the user. - -### vfs.Event - -vfs.Event is a simple struct encapsulating all the fields for an inotify event. -It is generated by vfs.Watches and forwarded to the watches' owners. It is -serialized to the user during read(2) syscalls on the associated fs.Inotify's -fd. - -## Lock Ordering - -There are three locks related to the inotify implementation: - -Inotify.mu: the inotify instance lock. Inotify.evMu: the inotify event queue -lock. Watches.mu: the watch set lock, used to protect the collection of watches -on a target. - -The correct lock ordering for inotify code is: - -Inotify.mu -> Watches.mu -> Inotify.evMu. - -Note that we use a distinct lock to protect the inotify event queue. If we -simply used Inotify.mu, we could simultaneously have locks being acquired in the -order of Inotify.mu -> Watches.mu and Watches.mu -> Inotify.mu, which would -cause deadlocks. For instance, adding a watch to an inotify instance would -require locking Inotify.mu, and then adding the same watch to the target would -cause Watches.mu to be held. At the same time, generating an event on the target -would require Watches.mu to be held before iterating through each watch, and -then notifying the owner of each watch would cause Inotify.mu to be held. - -See the vfs package comment to understand how inotify locks fit into the overall -ordering of filesystem locks. - -## Watch Targets in Different Filesystem Implementations - -In Linux, watches reside on inodes at the virtual filesystem layer. As a result, -all hard links and file descriptions on a single file will all share the same -watch set. In VFS2, there is no common inode structure across filesystem types -(some may not even have inodes), so we have to plumb inotify support through -each specific filesystem implementation. Some of the technical considerations -are outlined below. - -### Tmpfs - -For filesystems with inodes, like tmpfs, the design is quite similar to that of -Linux, where watches reside on the inode. - -### Pseudo-filesystems - -Technically, because inotify is implemented at the vfs layer in Linux, -pseudo-filesystems on top of kernfs support inotify passively. However, watches -can only track explicit filesystem operations like read/write, open/close, -mknod, etc., so watches on a target like /proc/self/fd will not generate events -every time a new fd is added or removed. As of this writing, we leave inotify -unimplemented in kernfs and anonfs; it does not seem particularly useful. - -### Gofer Filesystem (fsimpl/gofer) - -The gofer filesystem has several traits that make it difficult to support -inotify: - -* **There are no inodes.** A file is represented as a dentry that holds an - unopened p9 file (and possibly an open FID), through which the Sentry - interacts with the gofer. - * *Solution:* Because there is no inode structure stored in the sandbox, - inotify watches must be held on the dentry. For the purposes of inotify, - we assume that every dentry corresponds to a unique inode, which may - cause unexpected behavior in the presence of hard links, where multiple - dentries should share the same set of watches. Indeed, it is impossible - for us to be absolutely sure whether dentries correspond to the same - file or not, due to the following point: -* **The Sentry cannot always be aware of hard links on the remote - filesystem.** There is no way for us to confirm whether two files on the - remote filesystem are actually links to the same inode. QIDs and inodes are - not always 1:1. The assumption that dentries and inodes are 1:1 is - inevitably broken if there are remote hard links that we cannot detect. - * *Solution:* this is an issue with gofer fs in general, not only inotify, - and we will have to live with it. -* **Dentries can be cached, and then evicted.** Dentry lifetime does not - correspond to file lifetime. Because gofer fs is not entirely in-memory, the - absence of a dentry does not mean that the corresponding file does not - exist, nor does a dentry reaching zero references mean that the - corresponding file no longer exists. When a dentry reaches zero references, - it will be cached, in case the file at that path is needed again in the - future. However, the dentry may be evicted from the cache, which will cause - a new dentry to be created next time the same file path is used. The - existing watches will be lost. - * *Solution:* When a dentry reaches zero references, do not cache it if it - has any watches, so we can avoid eviction/destruction. Note that if the - dentry was deleted or invalidated (d.vfsd.IsDead()), we should still - destroy it along with its watches. Additionally, when a dentry’s last - watch is removed, we cache it if it also has zero references. This way, - the dentry can eventually be evicted from memory if it is no longer - needed. -* **Dentries can be invalidated.** Another issue with dentry lifetime is that - the remote file at the file path represented may change from underneath the - dentry. In this case, the next time that the dentry is used, it will be - invalidated and a new dentry will replace it. In this case, it is not clear - what should be done with the watches on the old dentry. - * *Solution:* Silently destroy the watches when invalidation occurs. We - have no way of knowing exactly what happened, when it happens. Inotify - instances on NFS files in Linux probably behave in a similar fashion, - since inotify is implemented at the vfs layer and is not aware of the - complexities of remote file systems. - * An alternative would be to issue some kind of event upon invalidation, - e.g. a delete event, but this has several issues: - * We cannot discern whether the remote file was invalidated because it was - moved, deleted, etc. This information is crucial, because these cases - should result in different events. Furthermore, the watches should only - be destroyed if the file has been deleted. - * Moreover, the mechanism for detecting whether the underlying file has - changed is to check whether a new QID is given by the gofer. This may - result in false positives, e.g. suppose that the server closed and - re-opened the same file, which may result in a new QID. - * Finally, the time of the event may be completely different from the time - of the file modification, since a dentry is not immediately notified - when the underlying file has changed. It would be quite unexpected to - receive the notification when invalidation was triggered, i.e. the next - time the file was accessed within the sandbox, because then the - read/write/etc. operation on the file would not result in the expected - event. - * Another point in favor of the first solution: inotify in Linux can - already be lossy on local filesystems (one of the sacrifices made so - that filesystem performance isn’t killed), and it is lossy on NFS for - similar reasons to gofer fs. Therefore, it is better for inotify to be - silent than to emit incorrect notifications. -* **There may be external users of the remote filesystem.** We can only track - operations performed on the file within the sandbox. This is sufficient - under InteropModeExclusive, but whenever there are external users, the set - of actions we are aware of is incomplete. - * *Solution:* We could either return an error or just issue a warning when - inotify is used without InteropModeExclusive. Although faulty, VFS1 - allows it when the filesystem is shared, and Linux does the same for - remote filesystems (as mentioned above, inotify sits at the vfs level). - -## Dentry Interface - -For events that must be generated above the vfs layer, we provide the following -DentryImpl methods to allow interactions with targets on any FilesystemImpl: - -* **InotifyWithParent()** generates events on the dentry’s watches as well as - its parent’s. -* **Watches()** retrieves the watch set of the target represented by the - dentry. This is used to access and modify watches on a target. -* **OnZeroWatches()** performs cleanup tasks after the last watch is removed - from a dentry. This is needed by gofer fs, which must allow a watched dentry - to be cached once it has no more watches. Most implementations can just do - nothing. Note that OnZeroWatches() must be called after all inotify locks - are released to preserve lock ordering, since it may acquire - FilesystemImpl-specific locks. - -## IN_EXCL_UNLINK - -There are several options that can be set for a watch, specified as part of the -mask in inotify_add_watch(2). In particular, IN_EXCL_UNLINK requires some -additional support in each filesystem. - -A watch with IN_EXCL_UNLINK will not generate events for its target if it -corresponds to a path that was unlinked. For instance, if an fd is opened on -“foo/bar” and “foo/bar” is subsequently unlinked, any reads/writes/etc. on the -fd will be ignored by watches on “foo” or “foo/bar” with IN_EXCL_UNLINK. This -requires each DentryImpl to keep track of whether it has been unlinked, in order -to determine whether events should be sent to watches with IN_EXCL_UNLINK. - -## IN_ONESHOT - -One-shot watches expire after generating a single event. When an event occurs, -all one-shot watches on the target that successfully generated an event are -removed. Lock ordering can cause the management of one-shot watches to be quite -expensive; see Watches.Notify() for more information. diff --git a/pkg/sentry/vfs/genericfstree/BUILD b/pkg/sentry/vfs/genericfstree/BUILD deleted file mode 100644 index d8fd92677..000000000 --- a/pkg/sentry/vfs/genericfstree/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools/go_generics:defs.bzl", "go_template") - -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -go_template( - name = "generic_fstree", - srcs = [ - "genericfstree.go", - ], - types = [ - "Dentry", - ], -) diff --git a/pkg/sentry/vfs/genericfstree/genericfstree.go b/pkg/sentry/vfs/genericfstree/genericfstree.go deleted file mode 100644 index ba6e6ed49..000000000 --- a/pkg/sentry/vfs/genericfstree/genericfstree.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package genericfstree provides tools for implementing vfs.FilesystemImpls -// where a single statically-determined lock or set of locks is sufficient to -// ensure that a Dentry's name and parent are contextually immutable. -// -// Clients using this package must use the go_template_instance rule in -// tools/go_generics/defs.bzl to create an instantiation of this template -// package, providing types to use in place of Dentry. -package genericfstree - -import ( - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// Dentry is a required type parameter that is a struct with the given fields. -// -// +stateify savable -type Dentry struct { - // vfsd is the embedded vfs.Dentry corresponding to this vfs.DentryImpl. - vfsd vfs.Dentry - - // parent is the parent of this Dentry in the filesystem's tree. If this - // Dentry is a filesystem root, parent is nil. - parent *Dentry - - // name is the name of this Dentry in its parent. If this Dentry is a - // filesystem root, name is unspecified. - name string -} - -// IsAncestorDentry returns true if d is an ancestor of d2; that is, d is -// either d2's parent or an ancestor of d2's parent. -func IsAncestorDentry(d, d2 *Dentry) bool { - for d2 != nil { // Stop at root, where d2.parent == nil. - if d2.parent == d { - return true - } - if d2.parent == d2 { - return false - } - d2 = d2.parent - } - return false -} - -// ParentOrSelf returns d.parent. If d.parent is nil, ParentOrSelf returns d. -func ParentOrSelf(d *Dentry) *Dentry { - if d.parent != nil { - return d.parent - } - return d -} - -// PrependPath is a generic implementation of FilesystemImpl.PrependPath(). -func PrependPath(vfsroot vfs.VirtualDentry, mnt *vfs.Mount, d *Dentry, b *fspath.Builder) error { - for { - if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { - return vfs.PrependPathAtVFSRootError{} - } - if mnt != nil && &d.vfsd == mnt.Root() { - return nil - } - if d.parent == nil { - return vfs.PrependPathAtNonMountRootError{} - } - b.PrependComponent(d.name) - d = d.parent - } -} - -// DebugPathname returns a pathname to d relative to its filesystem root. -// DebugPathname does not correspond to any Linux function; it's used to -// generate dentry pathnames for debugging. -func DebugPathname(d *Dentry) string { - var b fspath.Builder - _ = PrependPath(vfs.VirtualDentry{}, nil, d, &b) - return b.String() -} diff --git a/pkg/sentry/vfs/memxattr/BUILD b/pkg/sentry/vfs/memxattr/BUILD deleted file mode 100644 index 444ab42b9..000000000 --- a/pkg/sentry/vfs/memxattr/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "memxattr", - srcs = ["xattr.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/errors/linuxerr", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/sync", - ], -) diff --git a/pkg/sentry/vfs/memxattr/memxattr_state_autogen.go b/pkg/sentry/vfs/memxattr/memxattr_state_autogen.go new file mode 100644 index 000000000..f75223725 --- /dev/null +++ b/pkg/sentry/vfs/memxattr/memxattr_state_autogen.go @@ -0,0 +1,36 @@ +// automatically generated by stateify. + +package memxattr + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *SimpleExtendedAttributes) StateTypeName() string { + return "pkg/sentry/vfs/memxattr.SimpleExtendedAttributes" +} + +func (x *SimpleExtendedAttributes) StateFields() []string { + return []string{ + "xattrs", + } +} + +func (x *SimpleExtendedAttributes) beforeSave() {} + +// +checklocksignore +func (x *SimpleExtendedAttributes) StateSave(stateSinkObject state.Sink) { + x.beforeSave() + stateSinkObject.Save(0, &x.xattrs) +} + +func (x *SimpleExtendedAttributes) afterLoad() {} + +// +checklocksignore +func (x *SimpleExtendedAttributes) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &x.xattrs) +} + +func init() { + state.Register((*SimpleExtendedAttributes)(nil)) +} diff --git a/pkg/sentry/vfs/mount_namespace_refs.go b/pkg/sentry/vfs/mount_namespace_refs.go new file mode 100644 index 000000000..176733505 --- /dev/null +++ b/pkg/sentry/vfs/mount_namespace_refs.go @@ -0,0 +1,140 @@ +package vfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const MountNamespaceenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var MountNamespaceobj *MountNamespace + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type MountNamespaceRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *MountNamespaceRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *MountNamespaceRefs) RefType() string { + return fmt.Sprintf("%T", MountNamespaceobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *MountNamespaceRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *MountNamespaceRefs) LogRefs() bool { + return MountNamespaceenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *MountNamespaceRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *MountNamespaceRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if MountNamespaceenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *MountNamespaceRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if MountNamespaceenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *MountNamespaceRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if MountNamespaceenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *MountNamespaceRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go deleted file mode 100644 index cb882a983..000000000 --- a/pkg/sentry/vfs/mount_test.go +++ /dev/null @@ -1,467 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "fmt" - "runtime" - "testing" - - "gvisor.dev/gvisor/pkg/sync" -) - -func TestMountTableLookupEmpty(t *testing.T) { - var mt mountTable - mt.Init() - - parent := &Mount{} - point := &Dentry{} - if m := mt.Lookup(parent, point); m != nil { - t.Errorf("Empty mountTable lookup: got %p, wanted nil", m) - } -} - -func TestMountTableInsertLookup(t *testing.T) { - var mt mountTable - mt.Init() - - mount := &Mount{} - mount.setKey(VirtualDentry{&Mount{}, &Dentry{}}) - mt.Insert(mount) - - if m := mt.Lookup(mount.parent(), mount.point()); m != mount { - t.Errorf("mountTable positive lookup: got %p, wanted %p", m, mount) - } - - otherParent := &Mount{} - if m := mt.Lookup(otherParent, mount.point()); m != nil { - t.Errorf("mountTable lookup with wrong mount parent: got %p, wanted nil", m) - } - otherPoint := &Dentry{} - if m := mt.Lookup(mount.parent(), otherPoint); m != nil { - t.Errorf("mountTable lookup with wrong mount point: got %p, wanted nil", m) - } -} - -// TODO(gvisor.dev/issue/1035): concurrent lookup/insertion/removal. - -// must be powers of 2 -var benchNumMounts = []int{1 << 2, 1 << 5, 1 << 8} - -// For all of the following: -// -// - BenchmarkMountTableFoo tests usage pattern "Foo" for mountTable. -// -// - BenchmarkMountMapFoo tests usage pattern "Foo" for a -// sync.RWMutex-protected map. (Mutator benchmarks do not use a RWMutex, since -// mountTable also requires external synchronization between mutators.) -// -// - BenchmarkMountSyncMapFoo tests usage pattern "Foo" for a sync.Map. -// -// ParallelLookup is by far the most common and performance-sensitive operation -// for this application. NegativeLookup is also important, but less so (only -// relevant with multiple mount namespaces and significant differences in -// mounts between them). Insertion and removal are benchmarked for -// completeness. -const enableComparativeBenchmarks = false - -func newBenchMount() *Mount { - mount := &Mount{} - mount.loadKey(VirtualDentry{&Mount{}, &Dentry{}}) - return mount -} - -func BenchmarkMountTableParallelLookup(b *testing.B) { - for numG, maxG := 1, runtime.GOMAXPROCS(0); numG >= 0 && numG <= maxG; numG *= 2 { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%dx%d", numG, numMounts) - b.Run(desc, func(b *testing.B) { - var mt mountTable - mt.Init() - keys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - mt.Insert(mount) - keys = append(keys, mount.saveKey()) - } - - var ready sync.WaitGroup - begin := make(chan struct{}) - var end sync.WaitGroup - for g := 0; g < numG; g++ { - ready.Add(1) - end.Add(1) - go func() { - defer end.Done() - ready.Done() - <-begin - for i := 0; i < b.N; i++ { - k := keys[i&(numMounts-1)] - m := mt.Lookup(k.mount, k.dentry) - if m == nil { - b.Errorf("Lookup failed") - return - } - if parent := m.parent(); parent != k.mount { - b.Errorf("Lookup returned mount with parent %p, wanted %p", parent, k.mount) - return - } - if point := m.point(); point != k.dentry { - b.Errorf("Lookup returned mount with point %p, wanted %p", point, k.dentry) - return - } - } - }() - } - - ready.Wait() - b.ResetTimer() - close(begin) - end.Wait() - }) - } - } -} - -func BenchmarkMountMapParallelLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for numG, maxG := 1, runtime.GOMAXPROCS(0); numG >= 0 && numG <= maxG; numG *= 2 { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%dx%d", numG, numMounts) - b.Run(desc, func(b *testing.B) { - var mu sync.RWMutex - ms := make(map[VirtualDentry]*Mount) - keys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - key := mount.saveKey() - ms[key] = mount - keys = append(keys, key) - } - - var ready sync.WaitGroup - begin := make(chan struct{}) - var end sync.WaitGroup - for g := 0; g < numG; g++ { - ready.Add(1) - end.Add(1) - go func() { - defer end.Done() - ready.Done() - <-begin - for i := 0; i < b.N; i++ { - k := keys[i&(numMounts-1)] - mu.RLock() - m := ms[k] - mu.RUnlock() - if m == nil { - b.Errorf("Lookup failed") - return - } - if parent := m.parent(); parent != k.mount { - b.Errorf("Lookup returned mount with parent %p, wanted %p", parent, k.mount) - return - } - if point := m.point(); point != k.dentry { - b.Errorf("Lookup returned mount with point %p, wanted %p", point, k.dentry) - return - } - } - }() - } - - ready.Wait() - b.ResetTimer() - close(begin) - end.Wait() - }) - } - } -} - -func BenchmarkMountSyncMapParallelLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for numG, maxG := 1, runtime.GOMAXPROCS(0); numG >= 0 && numG <= maxG; numG *= 2 { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%dx%d", numG, numMounts) - b.Run(desc, func(b *testing.B) { - var ms sync.Map - keys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - key := mount.getKey() - ms.Store(key, mount) - keys = append(keys, key) - } - - var ready sync.WaitGroup - begin := make(chan struct{}) - var end sync.WaitGroup - for g := 0; g < numG; g++ { - ready.Add(1) - end.Add(1) - go func() { - defer end.Done() - ready.Done() - <-begin - for i := 0; i < b.N; i++ { - k := keys[i&(numMounts-1)] - mi, ok := ms.Load(k) - if !ok { - b.Errorf("Lookup failed") - return - } - m := mi.(*Mount) - if parent := m.parent(); parent != k.mount { - b.Errorf("Lookup returned mount with parent %p, wanted %p", parent, k.mount) - return - } - if point := m.point(); point != k.dentry { - b.Errorf("Lookup returned mount with point %p, wanted %p", point, k.dentry) - return - } - } - }() - } - - ready.Wait() - b.ResetTimer() - close(begin) - end.Wait() - }) - } - } -} - -func BenchmarkMountTableNegativeLookup(b *testing.B) { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%d", numMounts) - b.Run(desc, func(b *testing.B) { - var mt mountTable - mt.Init() - for i := 0; i < numMounts; i++ { - mt.Insert(newBenchMount()) - } - negkeys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - negkeys = append(negkeys, VirtualDentry{ - mount: &Mount{}, - dentry: &Dentry{}, - }) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - k := negkeys[i&(numMounts-1)] - m := mt.Lookup(k.mount, k.dentry) - if m != nil { - b.Fatalf("Lookup got %p, wanted nil", m) - } - } - }) - } -} - -func BenchmarkMountMapNegativeLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%d", numMounts) - b.Run(desc, func(b *testing.B) { - var mu sync.RWMutex - ms := make(map[VirtualDentry]*Mount) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - ms[mount.getKey()] = mount - } - negkeys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - negkeys = append(negkeys, VirtualDentry{ - mount: &Mount{}, - dentry: &Dentry{}, - }) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - k := negkeys[i&(numMounts-1)] - mu.RLock() - m := ms[k] - mu.RUnlock() - if m != nil { - b.Fatalf("Lookup got %p, wanted nil", m) - } - } - }) - } -} - -func BenchmarkMountSyncMapNegativeLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%d", numMounts) - b.Run(desc, func(b *testing.B) { - var ms sync.Map - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - ms.Store(mount.saveKey(), mount) - } - negkeys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - negkeys = append(negkeys, VirtualDentry{ - mount: &Mount{}, - dentry: &Dentry{}, - }) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - k := negkeys[i&(numMounts-1)] - m, _ := ms.Load(k) - if m != nil { - b.Fatalf("Lookup got %p, wanted nil", m) - } - } - }) - } -} - -func BenchmarkMountTableInsert(b *testing.B) { - // Preallocate Mounts so that allocation time isn't included in the - // benchmark. - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - - var mt mountTable - mt.Init() - b.ResetTimer() - for i := range mounts { - mt.Insert(mounts[i]) - } -} - -func BenchmarkMountMapInsert(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - // Preallocate Mounts so that allocation time isn't included in the - // benchmark. - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - - ms := make(map[VirtualDentry]*Mount) - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - ms[mount.saveKey()] = mount - } -} - -func BenchmarkMountSyncMapInsert(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - // Preallocate Mounts so that allocation time isn't included in the - // benchmark. - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - - var ms sync.Map - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - ms.Store(mount.saveKey(), mount) - } -} - -func BenchmarkMountTableRemove(b *testing.B) { - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - var mt mountTable - mt.Init() - for i := range mounts { - mt.Insert(mounts[i]) - } - - b.ResetTimer() - for i := range mounts { - mt.Remove(mounts[i]) - } -} - -func BenchmarkMountMapRemove(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - ms := make(map[VirtualDentry]*Mount) - for i := range mounts { - mount := mounts[i] - ms[mount.saveKey()] = mount - } - - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - delete(ms, mount.saveKey()) - } -} - -func BenchmarkMountSyncMapRemove(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - var ms sync.Map - for i := range mounts { - mount := mounts[i] - ms.Store(mount.saveKey(), mount) - } - - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - ms.Delete(mount.saveKey()) - } -} diff --git a/pkg/sentry/vfs/vfs_state_autogen.go b/pkg/sentry/vfs/vfs_state_autogen.go new file mode 100644 index 000000000..364adcc0f --- /dev/null +++ b/pkg/sentry/vfs/vfs_state_autogen.go @@ -0,0 +1,2052 @@ +// automatically generated by stateify. + +package vfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (a *anonFilesystemType) StateTypeName() string { + return "pkg/sentry/vfs.anonFilesystemType" +} + +func (a *anonFilesystemType) StateFields() []string { + return []string{} +} + +func (a *anonFilesystemType) beforeSave() {} + +// +checklocksignore +func (a *anonFilesystemType) StateSave(stateSinkObject state.Sink) { + a.beforeSave() +} + +func (a *anonFilesystemType) afterLoad() {} + +// +checklocksignore +func (a *anonFilesystemType) StateLoad(stateSourceObject state.Source) { +} + +func (fs *anonFilesystem) StateTypeName() string { + return "pkg/sentry/vfs.anonFilesystem" +} + +func (fs *anonFilesystem) StateFields() []string { + return []string{ + "vfsfs", + "devMinor", + } +} + +func (fs *anonFilesystem) beforeSave() {} + +// +checklocksignore +func (fs *anonFilesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.vfsfs) + stateSinkObject.Save(1, &fs.devMinor) +} + +func (fs *anonFilesystem) afterLoad() {} + +// +checklocksignore +func (fs *anonFilesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.vfsfs) + stateSourceObject.Load(1, &fs.devMinor) +} + +func (d *anonDentry) StateTypeName() string { + return "pkg/sentry/vfs.anonDentry" +} + +func (d *anonDentry) StateFields() []string { + return []string{ + "vfsd", + "name", + } +} + +func (d *anonDentry) beforeSave() {} + +// +checklocksignore +func (d *anonDentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.name) +} + +func (d *anonDentry) afterLoad() {} + +// +checklocksignore +func (d *anonDentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.name) +} + +func (d *Dentry) StateTypeName() string { + return "pkg/sentry/vfs.Dentry" +} + +func (d *Dentry) StateFields() []string { + return []string{ + "dead", + "mounts", + "impl", + } +} + +func (d *Dentry) beforeSave() {} + +// +checklocksignore +func (d *Dentry) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.dead) + stateSinkObject.Save(1, &d.mounts) + stateSinkObject.Save(2, &d.impl) +} + +func (d *Dentry) afterLoad() {} + +// +checklocksignore +func (d *Dentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.dead) + stateSourceObject.Load(1, &d.mounts) + stateSourceObject.Load(2, &d.impl) +} + +func (kind *DeviceKind) StateTypeName() string { + return "pkg/sentry/vfs.DeviceKind" +} + +func (kind *DeviceKind) StateFields() []string { + return nil +} + +func (d *devTuple) StateTypeName() string { + return "pkg/sentry/vfs.devTuple" +} + +func (d *devTuple) StateFields() []string { + return []string{ + "kind", + "major", + "minor", + } +} + +func (d *devTuple) beforeSave() {} + +// +checklocksignore +func (d *devTuple) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.kind) + stateSinkObject.Save(1, &d.major) + stateSinkObject.Save(2, &d.minor) +} + +func (d *devTuple) afterLoad() {} + +// +checklocksignore +func (d *devTuple) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.kind) + stateSourceObject.Load(1, &d.major) + stateSourceObject.Load(2, &d.minor) +} + +func (r *registeredDevice) StateTypeName() string { + return "pkg/sentry/vfs.registeredDevice" +} + +func (r *registeredDevice) StateFields() []string { + return []string{ + "dev", + "opts", + } +} + +func (r *registeredDevice) beforeSave() {} + +// +checklocksignore +func (r *registeredDevice) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.dev) + stateSinkObject.Save(1, &r.opts) +} + +func (r *registeredDevice) afterLoad() {} + +// +checklocksignore +func (r *registeredDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.dev) + stateSourceObject.Load(1, &r.opts) +} + +func (r *RegisterDeviceOptions) StateTypeName() string { + return "pkg/sentry/vfs.RegisterDeviceOptions" +} + +func (r *RegisterDeviceOptions) StateFields() []string { + return []string{ + "GroupName", + } +} + +func (r *RegisterDeviceOptions) beforeSave() {} + +// +checklocksignore +func (r *RegisterDeviceOptions) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.GroupName) +} + +func (r *RegisterDeviceOptions) afterLoad() {} + +// +checklocksignore +func (r *RegisterDeviceOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.GroupName) +} + +func (ep *EpollInstance) StateTypeName() string { + return "pkg/sentry/vfs.EpollInstance" +} + +func (ep *EpollInstance) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "q", + "interest", + "ready", + } +} + +func (ep *EpollInstance) beforeSave() {} + +// +checklocksignore +func (ep *EpollInstance) StateSave(stateSinkObject state.Sink) { + ep.beforeSave() + stateSinkObject.Save(0, &ep.vfsfd) + stateSinkObject.Save(1, &ep.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &ep.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &ep.NoLockFD) + stateSinkObject.Save(4, &ep.q) + stateSinkObject.Save(5, &ep.interest) + stateSinkObject.Save(6, &ep.ready) +} + +func (ep *EpollInstance) afterLoad() {} + +// +checklocksignore +func (ep *EpollInstance) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ep.vfsfd) + stateSourceObject.Load(1, &ep.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &ep.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &ep.NoLockFD) + stateSourceObject.Load(4, &ep.q) + stateSourceObject.Load(5, &ep.interest) + stateSourceObject.Load(6, &ep.ready) +} + +func (e *epollInterestKey) StateTypeName() string { + return "pkg/sentry/vfs.epollInterestKey" +} + +func (e *epollInterestKey) StateFields() []string { + return []string{ + "file", + "num", + } +} + +func (e *epollInterestKey) beforeSave() {} + +// +checklocksignore +func (e *epollInterestKey) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.file) + stateSinkObject.Save(1, &e.num) +} + +func (e *epollInterestKey) afterLoad() {} + +// +checklocksignore +func (e *epollInterestKey) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.file) + stateSourceObject.Load(1, &e.num) +} + +func (epi *epollInterest) StateTypeName() string { + return "pkg/sentry/vfs.epollInterest" +} + +func (epi *epollInterest) StateFields() []string { + return []string{ + "epoll", + "key", + "waiter", + "mask", + "ready", + "epollInterestEntry", + "userData", + } +} + +func (epi *epollInterest) beforeSave() {} + +// +checklocksignore +func (epi *epollInterest) StateSave(stateSinkObject state.Sink) { + epi.beforeSave() + stateSinkObject.Save(0, &epi.epoll) + stateSinkObject.Save(1, &epi.key) + stateSinkObject.Save(2, &epi.waiter) + stateSinkObject.Save(3, &epi.mask) + stateSinkObject.Save(4, &epi.ready) + stateSinkObject.Save(5, &epi.epollInterestEntry) + stateSinkObject.Save(6, &epi.userData) +} + +// +checklocksignore +func (epi *epollInterest) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &epi.epoll) + stateSourceObject.Load(1, &epi.key) + stateSourceObject.Load(2, &epi.waiter) + stateSourceObject.Load(3, &epi.mask) + stateSourceObject.Load(4, &epi.ready) + stateSourceObject.Load(5, &epi.epollInterestEntry) + stateSourceObject.Load(6, &epi.userData) + stateSourceObject.AfterLoad(epi.afterLoad) +} + +func (l *epollInterestList) StateTypeName() string { + return "pkg/sentry/vfs.epollInterestList" +} + +func (l *epollInterestList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *epollInterestList) beforeSave() {} + +// +checklocksignore +func (l *epollInterestList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *epollInterestList) afterLoad() {} + +// +checklocksignore +func (l *epollInterestList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *epollInterestEntry) StateTypeName() string { + return "pkg/sentry/vfs.epollInterestEntry" +} + +func (e *epollInterestEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *epollInterestEntry) beforeSave() {} + +// +checklocksignore +func (e *epollInterestEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *epollInterestEntry) afterLoad() {} + +// +checklocksignore +func (e *epollInterestEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (l *eventList) StateTypeName() string { + return "pkg/sentry/vfs.eventList" +} + +func (l *eventList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *eventList) beforeSave() {} + +// +checklocksignore +func (l *eventList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *eventList) afterLoad() {} + +// +checklocksignore +func (l *eventList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *eventEntry) StateTypeName() string { + return "pkg/sentry/vfs.eventEntry" +} + +func (e *eventEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *eventEntry) beforeSave() {} + +// +checklocksignore +func (e *eventEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *eventEntry) afterLoad() {} + +// +checklocksignore +func (e *eventEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (fd *FileDescription) StateTypeName() string { + return "pkg/sentry/vfs.FileDescription" +} + +func (fd *FileDescription) StateFields() []string { + return []string{ + "FileDescriptionRefs", + "statusFlags", + "asyncHandler", + "epolls", + "vd", + "opts", + "readable", + "writable", + "usedLockBSD", + "impl", + } +} + +// +checklocksignore +func (fd *FileDescription) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.FileDescriptionRefs) + stateSinkObject.Save(1, &fd.statusFlags) + stateSinkObject.Save(2, &fd.asyncHandler) + stateSinkObject.Save(3, &fd.epolls) + stateSinkObject.Save(4, &fd.vd) + stateSinkObject.Save(5, &fd.opts) + stateSinkObject.Save(6, &fd.readable) + stateSinkObject.Save(7, &fd.writable) + stateSinkObject.Save(8, &fd.usedLockBSD) + stateSinkObject.Save(9, &fd.impl) +} + +// +checklocksignore +func (fd *FileDescription) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.FileDescriptionRefs) + stateSourceObject.Load(1, &fd.statusFlags) + stateSourceObject.Load(2, &fd.asyncHandler) + stateSourceObject.Load(3, &fd.epolls) + stateSourceObject.Load(4, &fd.vd) + stateSourceObject.Load(5, &fd.opts) + stateSourceObject.Load(6, &fd.readable) + stateSourceObject.Load(7, &fd.writable) + stateSourceObject.Load(8, &fd.usedLockBSD) + stateSourceObject.Load(9, &fd.impl) + stateSourceObject.AfterLoad(fd.afterLoad) +} + +func (f *FileDescriptionOptions) StateTypeName() string { + return "pkg/sentry/vfs.FileDescriptionOptions" +} + +func (f *FileDescriptionOptions) StateFields() []string { + return []string{ + "AllowDirectIO", + "DenyPRead", + "DenyPWrite", + "UseDentryMetadata", + } +} + +func (f *FileDescriptionOptions) beforeSave() {} + +// +checklocksignore +func (f *FileDescriptionOptions) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.AllowDirectIO) + stateSinkObject.Save(1, &f.DenyPRead) + stateSinkObject.Save(2, &f.DenyPWrite) + stateSinkObject.Save(3, &f.UseDentryMetadata) +} + +func (f *FileDescriptionOptions) afterLoad() {} + +// +checklocksignore +func (f *FileDescriptionOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.AllowDirectIO) + stateSourceObject.Load(1, &f.DenyPRead) + stateSourceObject.Load(2, &f.DenyPWrite) + stateSourceObject.Load(3, &f.UseDentryMetadata) +} + +func (d *Dirent) StateTypeName() string { + return "pkg/sentry/vfs.Dirent" +} + +func (d *Dirent) StateFields() []string { + return []string{ + "Name", + "Type", + "Ino", + "NextOff", + } +} + +func (d *Dirent) beforeSave() {} + +// +checklocksignore +func (d *Dirent) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Name) + stateSinkObject.Save(1, &d.Type) + stateSinkObject.Save(2, &d.Ino) + stateSinkObject.Save(3, &d.NextOff) +} + +func (d *Dirent) afterLoad() {} + +// +checklocksignore +func (d *Dirent) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Name) + stateSourceObject.Load(1, &d.Type) + stateSourceObject.Load(2, &d.Ino) + stateSourceObject.Load(3, &d.NextOff) +} + +func (f *FileDescriptionDefaultImpl) StateTypeName() string { + return "pkg/sentry/vfs.FileDescriptionDefaultImpl" +} + +func (f *FileDescriptionDefaultImpl) StateFields() []string { + return []string{} +} + +func (f *FileDescriptionDefaultImpl) beforeSave() {} + +// +checklocksignore +func (f *FileDescriptionDefaultImpl) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *FileDescriptionDefaultImpl) afterLoad() {} + +// +checklocksignore +func (f *FileDescriptionDefaultImpl) StateLoad(stateSourceObject state.Source) { +} + +func (d *DirectoryFileDescriptionDefaultImpl) StateTypeName() string { + return "pkg/sentry/vfs.DirectoryFileDescriptionDefaultImpl" +} + +func (d *DirectoryFileDescriptionDefaultImpl) StateFields() []string { + return []string{} +} + +func (d *DirectoryFileDescriptionDefaultImpl) beforeSave() {} + +// +checklocksignore +func (d *DirectoryFileDescriptionDefaultImpl) StateSave(stateSinkObject state.Sink) { + d.beforeSave() +} + +func (d *DirectoryFileDescriptionDefaultImpl) afterLoad() {} + +// +checklocksignore +func (d *DirectoryFileDescriptionDefaultImpl) StateLoad(stateSourceObject state.Source) { +} + +func (d *DentryMetadataFileDescriptionImpl) StateTypeName() string { + return "pkg/sentry/vfs.DentryMetadataFileDescriptionImpl" +} + +func (d *DentryMetadataFileDescriptionImpl) StateFields() []string { + return []string{} +} + +func (d *DentryMetadataFileDescriptionImpl) beforeSave() {} + +// +checklocksignore +func (d *DentryMetadataFileDescriptionImpl) StateSave(stateSinkObject state.Sink) { + d.beforeSave() +} + +func (d *DentryMetadataFileDescriptionImpl) afterLoad() {} + +// +checklocksignore +func (d *DentryMetadataFileDescriptionImpl) StateLoad(stateSourceObject state.Source) { +} + +func (s *StaticData) StateTypeName() string { + return "pkg/sentry/vfs.StaticData" +} + +func (s *StaticData) StateFields() []string { + return []string{ + "Data", + } +} + +func (s *StaticData) beforeSave() {} + +// +checklocksignore +func (s *StaticData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Data) +} + +func (s *StaticData) afterLoad() {} + +// +checklocksignore +func (s *StaticData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Data) +} + +func (fd *DynamicBytesFileDescriptionImpl) StateTypeName() string { + return "pkg/sentry/vfs.DynamicBytesFileDescriptionImpl" +} + +func (fd *DynamicBytesFileDescriptionImpl) StateFields() []string { + return []string{ + "data", + "buf", + "off", + "lastRead", + } +} + +func (fd *DynamicBytesFileDescriptionImpl) beforeSave() {} + +// +checklocksignore +func (fd *DynamicBytesFileDescriptionImpl) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + var bufValue []byte = fd.saveBuf() + stateSinkObject.SaveValue(1, bufValue) + stateSinkObject.Save(0, &fd.data) + stateSinkObject.Save(2, &fd.off) + stateSinkObject.Save(3, &fd.lastRead) +} + +func (fd *DynamicBytesFileDescriptionImpl) afterLoad() {} + +// +checklocksignore +func (fd *DynamicBytesFileDescriptionImpl) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.data) + stateSourceObject.Load(2, &fd.off) + stateSourceObject.Load(3, &fd.lastRead) + stateSourceObject.LoadValue(1, new([]byte), func(y interface{}) { fd.loadBuf(y.([]byte)) }) +} + +func (fd *LockFD) StateTypeName() string { + return "pkg/sentry/vfs.LockFD" +} + +func (fd *LockFD) StateFields() []string { + return []string{ + "locks", + } +} + +func (fd *LockFD) beforeSave() {} + +// +checklocksignore +func (fd *LockFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.locks) +} + +func (fd *LockFD) afterLoad() {} + +// +checklocksignore +func (fd *LockFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.locks) +} + +func (n *NoLockFD) StateTypeName() string { + return "pkg/sentry/vfs.NoLockFD" +} + +func (n *NoLockFD) StateFields() []string { + return []string{} +} + +func (n *NoLockFD) beforeSave() {} + +// +checklocksignore +func (n *NoLockFD) StateSave(stateSinkObject state.Sink) { + n.beforeSave() +} + +func (n *NoLockFD) afterLoad() {} + +// +checklocksignore +func (n *NoLockFD) StateLoad(stateSourceObject state.Source) { +} + +func (b *BadLockFD) StateTypeName() string { + return "pkg/sentry/vfs.BadLockFD" +} + +func (b *BadLockFD) StateFields() []string { + return []string{} +} + +func (b *BadLockFD) beforeSave() {} + +// +checklocksignore +func (b *BadLockFD) StateSave(stateSinkObject state.Sink) { + b.beforeSave() +} + +func (b *BadLockFD) afterLoad() {} + +// +checklocksignore +func (b *BadLockFD) StateLoad(stateSourceObject state.Source) { +} + +func (r *FileDescriptionRefs) StateTypeName() string { + return "pkg/sentry/vfs.FileDescriptionRefs" +} + +func (r *FileDescriptionRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *FileDescriptionRefs) beforeSave() {} + +// +checklocksignore +func (r *FileDescriptionRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *FileDescriptionRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (fs *Filesystem) StateTypeName() string { + return "pkg/sentry/vfs.Filesystem" +} + +func (fs *Filesystem) StateFields() []string { + return []string{ + "FilesystemRefs", + "vfs", + "fsType", + "impl", + } +} + +func (fs *Filesystem) beforeSave() {} + +// +checklocksignore +func (fs *Filesystem) StateSave(stateSinkObject state.Sink) { + fs.beforeSave() + stateSinkObject.Save(0, &fs.FilesystemRefs) + stateSinkObject.Save(1, &fs.vfs) + stateSinkObject.Save(2, &fs.fsType) + stateSinkObject.Save(3, &fs.impl) +} + +func (fs *Filesystem) afterLoad() {} + +// +checklocksignore +func (fs *Filesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fs.FilesystemRefs) + stateSourceObject.Load(1, &fs.vfs) + stateSourceObject.Load(2, &fs.fsType) + stateSourceObject.Load(3, &fs.impl) +} + +func (p *PrependPathAtVFSRootError) StateTypeName() string { + return "pkg/sentry/vfs.PrependPathAtVFSRootError" +} + +func (p *PrependPathAtVFSRootError) StateFields() []string { + return []string{} +} + +func (p *PrependPathAtVFSRootError) beforeSave() {} + +// +checklocksignore +func (p *PrependPathAtVFSRootError) StateSave(stateSinkObject state.Sink) { + p.beforeSave() +} + +func (p *PrependPathAtVFSRootError) afterLoad() {} + +// +checklocksignore +func (p *PrependPathAtVFSRootError) StateLoad(stateSourceObject state.Source) { +} + +func (p *PrependPathAtNonMountRootError) StateTypeName() string { + return "pkg/sentry/vfs.PrependPathAtNonMountRootError" +} + +func (p *PrependPathAtNonMountRootError) StateFields() []string { + return []string{} +} + +func (p *PrependPathAtNonMountRootError) beforeSave() {} + +// +checklocksignore +func (p *PrependPathAtNonMountRootError) StateSave(stateSinkObject state.Sink) { + p.beforeSave() +} + +func (p *PrependPathAtNonMountRootError) afterLoad() {} + +// +checklocksignore +func (p *PrependPathAtNonMountRootError) StateLoad(stateSourceObject state.Source) { +} + +func (p *PrependPathSyntheticError) StateTypeName() string { + return "pkg/sentry/vfs.PrependPathSyntheticError" +} + +func (p *PrependPathSyntheticError) StateFields() []string { + return []string{} +} + +func (p *PrependPathSyntheticError) beforeSave() {} + +// +checklocksignore +func (p *PrependPathSyntheticError) StateSave(stateSinkObject state.Sink) { + p.beforeSave() +} + +func (p *PrependPathSyntheticError) afterLoad() {} + +// +checklocksignore +func (p *PrependPathSyntheticError) StateLoad(stateSourceObject state.Source) { +} + +func (r *FilesystemRefs) StateTypeName() string { + return "pkg/sentry/vfs.FilesystemRefs" +} + +func (r *FilesystemRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *FilesystemRefs) beforeSave() {} + +// +checklocksignore +func (r *FilesystemRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *FilesystemRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (r *registeredFilesystemType) StateTypeName() string { + return "pkg/sentry/vfs.registeredFilesystemType" +} + +func (r *registeredFilesystemType) StateFields() []string { + return []string{ + "fsType", + "opts", + } +} + +func (r *registeredFilesystemType) beforeSave() {} + +// +checklocksignore +func (r *registeredFilesystemType) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.fsType) + stateSinkObject.Save(1, &r.opts) +} + +func (r *registeredFilesystemType) afterLoad() {} + +// +checklocksignore +func (r *registeredFilesystemType) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.fsType) + stateSourceObject.Load(1, &r.opts) +} + +func (r *RegisterFilesystemTypeOptions) StateTypeName() string { + return "pkg/sentry/vfs.RegisterFilesystemTypeOptions" +} + +func (r *RegisterFilesystemTypeOptions) StateFields() []string { + return []string{ + "AllowUserMount", + "AllowUserList", + "RequiresDevice", + } +} + +func (r *RegisterFilesystemTypeOptions) beforeSave() {} + +// +checklocksignore +func (r *RegisterFilesystemTypeOptions) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.AllowUserMount) + stateSinkObject.Save(1, &r.AllowUserList) + stateSinkObject.Save(2, &r.RequiresDevice) +} + +func (r *RegisterFilesystemTypeOptions) afterLoad() {} + +// +checklocksignore +func (r *RegisterFilesystemTypeOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.AllowUserMount) + stateSourceObject.Load(1, &r.AllowUserList) + stateSourceObject.Load(2, &r.RequiresDevice) +} + +func (e *EventType) StateTypeName() string { + return "pkg/sentry/vfs.EventType" +} + +func (e *EventType) StateFields() []string { + return nil +} + +func (i *Inotify) StateTypeName() string { + return "pkg/sentry/vfs.Inotify" +} + +func (i *Inotify) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "DentryMetadataFileDescriptionImpl", + "NoLockFD", + "id", + "queue", + "events", + "scratch", + "nextWatchMinusOne", + "watches", + } +} + +func (i *Inotify) beforeSave() {} + +// +checklocksignore +func (i *Inotify) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.vfsfd) + stateSinkObject.Save(1, &i.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &i.DentryMetadataFileDescriptionImpl) + stateSinkObject.Save(3, &i.NoLockFD) + stateSinkObject.Save(4, &i.id) + stateSinkObject.Save(5, &i.queue) + stateSinkObject.Save(6, &i.events) + stateSinkObject.Save(7, &i.scratch) + stateSinkObject.Save(8, &i.nextWatchMinusOne) + stateSinkObject.Save(9, &i.watches) +} + +func (i *Inotify) afterLoad() {} + +// +checklocksignore +func (i *Inotify) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.vfsfd) + stateSourceObject.Load(1, &i.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &i.DentryMetadataFileDescriptionImpl) + stateSourceObject.Load(3, &i.NoLockFD) + stateSourceObject.Load(4, &i.id) + stateSourceObject.Load(5, &i.queue) + stateSourceObject.Load(6, &i.events) + stateSourceObject.Load(7, &i.scratch) + stateSourceObject.Load(8, &i.nextWatchMinusOne) + stateSourceObject.Load(9, &i.watches) +} + +func (w *Watches) StateTypeName() string { + return "pkg/sentry/vfs.Watches" +} + +func (w *Watches) StateFields() []string { + return []string{ + "ws", + } +} + +func (w *Watches) beforeSave() {} + +// +checklocksignore +func (w *Watches) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.ws) +} + +func (w *Watches) afterLoad() {} + +// +checklocksignore +func (w *Watches) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.ws) +} + +func (w *Watch) StateTypeName() string { + return "pkg/sentry/vfs.Watch" +} + +func (w *Watch) StateFields() []string { + return []string{ + "owner", + "wd", + "target", + "mask", + "expired", + } +} + +func (w *Watch) beforeSave() {} + +// +checklocksignore +func (w *Watch) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.owner) + stateSinkObject.Save(1, &w.wd) + stateSinkObject.Save(2, &w.target) + stateSinkObject.Save(3, &w.mask) + stateSinkObject.Save(4, &w.expired) +} + +func (w *Watch) afterLoad() {} + +// +checklocksignore +func (w *Watch) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.owner) + stateSourceObject.Load(1, &w.wd) + stateSourceObject.Load(2, &w.target) + stateSourceObject.Load(3, &w.mask) + stateSourceObject.Load(4, &w.expired) +} + +func (e *Event) StateTypeName() string { + return "pkg/sentry/vfs.Event" +} + +func (e *Event) StateFields() []string { + return []string{ + "eventEntry", + "wd", + "mask", + "cookie", + "len", + "name", + } +} + +func (e *Event) beforeSave() {} + +// +checklocksignore +func (e *Event) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.eventEntry) + stateSinkObject.Save(1, &e.wd) + stateSinkObject.Save(2, &e.mask) + stateSinkObject.Save(3, &e.cookie) + stateSinkObject.Save(4, &e.len) + stateSinkObject.Save(5, &e.name) +} + +func (e *Event) afterLoad() {} + +// +checklocksignore +func (e *Event) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.eventEntry) + stateSourceObject.Load(1, &e.wd) + stateSourceObject.Load(2, &e.mask) + stateSourceObject.Load(3, &e.cookie) + stateSourceObject.Load(4, &e.len) + stateSourceObject.Load(5, &e.name) +} + +func (fl *FileLocks) StateTypeName() string { + return "pkg/sentry/vfs.FileLocks" +} + +func (fl *FileLocks) StateFields() []string { + return []string{ + "bsd", + "posix", + } +} + +func (fl *FileLocks) beforeSave() {} + +// +checklocksignore +func (fl *FileLocks) StateSave(stateSinkObject state.Sink) { + fl.beforeSave() + stateSinkObject.Save(0, &fl.bsd) + stateSinkObject.Save(1, &fl.posix) +} + +func (fl *FileLocks) afterLoad() {} + +// +checklocksignore +func (fl *FileLocks) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fl.bsd) + stateSourceObject.Load(1, &fl.posix) +} + +func (mnt *Mount) StateTypeName() string { + return "pkg/sentry/vfs.Mount" +} + +func (mnt *Mount) StateFields() []string { + return []string{ + "vfs", + "fs", + "root", + "ID", + "Flags", + "key", + "ns", + "refs", + "children", + "umounted", + "writers", + } +} + +func (mnt *Mount) beforeSave() {} + +// +checklocksignore +func (mnt *Mount) StateSave(stateSinkObject state.Sink) { + mnt.beforeSave() + var keyValue VirtualDentry = mnt.saveKey() + stateSinkObject.SaveValue(5, keyValue) + stateSinkObject.Save(0, &mnt.vfs) + stateSinkObject.Save(1, &mnt.fs) + stateSinkObject.Save(2, &mnt.root) + stateSinkObject.Save(3, &mnt.ID) + stateSinkObject.Save(4, &mnt.Flags) + stateSinkObject.Save(6, &mnt.ns) + stateSinkObject.Save(7, &mnt.refs) + stateSinkObject.Save(8, &mnt.children) + stateSinkObject.Save(9, &mnt.umounted) + stateSinkObject.Save(10, &mnt.writers) +} + +// +checklocksignore +func (mnt *Mount) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mnt.vfs) + stateSourceObject.Load(1, &mnt.fs) + stateSourceObject.Load(2, &mnt.root) + stateSourceObject.Load(3, &mnt.ID) + stateSourceObject.Load(4, &mnt.Flags) + stateSourceObject.Load(6, &mnt.ns) + stateSourceObject.Load(7, &mnt.refs) + stateSourceObject.Load(8, &mnt.children) + stateSourceObject.Load(9, &mnt.umounted) + stateSourceObject.Load(10, &mnt.writers) + stateSourceObject.LoadValue(5, new(VirtualDentry), func(y interface{}) { mnt.loadKey(y.(VirtualDentry)) }) + stateSourceObject.AfterLoad(mnt.afterLoad) +} + +func (mntns *MountNamespace) StateTypeName() string { + return "pkg/sentry/vfs.MountNamespace" +} + +func (mntns *MountNamespace) StateFields() []string { + return []string{ + "MountNamespaceRefs", + "Owner", + "root", + "mountpoints", + } +} + +func (mntns *MountNamespace) beforeSave() {} + +// +checklocksignore +func (mntns *MountNamespace) StateSave(stateSinkObject state.Sink) { + mntns.beforeSave() + stateSinkObject.Save(0, &mntns.MountNamespaceRefs) + stateSinkObject.Save(1, &mntns.Owner) + stateSinkObject.Save(2, &mntns.root) + stateSinkObject.Save(3, &mntns.mountpoints) +} + +func (mntns *MountNamespace) afterLoad() {} + +// +checklocksignore +func (mntns *MountNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mntns.MountNamespaceRefs) + stateSourceObject.Load(1, &mntns.Owner) + stateSourceObject.Load(2, &mntns.root) + stateSourceObject.Load(3, &mntns.mountpoints) +} + +func (u *umountRecursiveOptions) StateTypeName() string { + return "pkg/sentry/vfs.umountRecursiveOptions" +} + +func (u *umountRecursiveOptions) StateFields() []string { + return []string{ + "eager", + "disconnectHierarchy", + } +} + +func (u *umountRecursiveOptions) beforeSave() {} + +// +checklocksignore +func (u *umountRecursiveOptions) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.eager) + stateSinkObject.Save(1, &u.disconnectHierarchy) +} + +func (u *umountRecursiveOptions) afterLoad() {} + +// +checklocksignore +func (u *umountRecursiveOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.eager) + stateSourceObject.Load(1, &u.disconnectHierarchy) +} + +func (r *MountNamespaceRefs) StateTypeName() string { + return "pkg/sentry/vfs.MountNamespaceRefs" +} + +func (r *MountNamespaceRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *MountNamespaceRefs) beforeSave() {} + +// +checklocksignore +func (r *MountNamespaceRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *MountNamespaceRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (fd *opathFD) StateTypeName() string { + return "pkg/sentry/vfs.opathFD" +} + +func (fd *opathFD) StateFields() []string { + return []string{ + "vfsfd", + "FileDescriptionDefaultImpl", + "BadLockFD", + } +} + +func (fd *opathFD) beforeSave() {} + +// +checklocksignore +func (fd *opathFD) StateSave(stateSinkObject state.Sink) { + fd.beforeSave() + stateSinkObject.Save(0, &fd.vfsfd) + stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl) + stateSinkObject.Save(2, &fd.BadLockFD) +} + +func (fd *opathFD) afterLoad() {} + +// +checklocksignore +func (fd *opathFD) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fd.vfsfd) + stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl) + stateSourceObject.Load(2, &fd.BadLockFD) +} + +func (g *GetDentryOptions) StateTypeName() string { + return "pkg/sentry/vfs.GetDentryOptions" +} + +func (g *GetDentryOptions) StateFields() []string { + return []string{ + "CheckSearchable", + } +} + +func (g *GetDentryOptions) beforeSave() {} + +// +checklocksignore +func (g *GetDentryOptions) StateSave(stateSinkObject state.Sink) { + g.beforeSave() + stateSinkObject.Save(0, &g.CheckSearchable) +} + +func (g *GetDentryOptions) afterLoad() {} + +// +checklocksignore +func (g *GetDentryOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &g.CheckSearchable) +} + +func (m *MkdirOptions) StateTypeName() string { + return "pkg/sentry/vfs.MkdirOptions" +} + +func (m *MkdirOptions) StateFields() []string { + return []string{ + "Mode", + "ForSyntheticMountpoint", + } +} + +func (m *MkdirOptions) beforeSave() {} + +// +checklocksignore +func (m *MkdirOptions) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.Mode) + stateSinkObject.Save(1, &m.ForSyntheticMountpoint) +} + +func (m *MkdirOptions) afterLoad() {} + +// +checklocksignore +func (m *MkdirOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.Mode) + stateSourceObject.Load(1, &m.ForSyntheticMountpoint) +} + +func (m *MknodOptions) StateTypeName() string { + return "pkg/sentry/vfs.MknodOptions" +} + +func (m *MknodOptions) StateFields() []string { + return []string{ + "Mode", + "DevMajor", + "DevMinor", + "Endpoint", + } +} + +func (m *MknodOptions) beforeSave() {} + +// +checklocksignore +func (m *MknodOptions) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.Mode) + stateSinkObject.Save(1, &m.DevMajor) + stateSinkObject.Save(2, &m.DevMinor) + stateSinkObject.Save(3, &m.Endpoint) +} + +func (m *MknodOptions) afterLoad() {} + +// +checklocksignore +func (m *MknodOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.Mode) + stateSourceObject.Load(1, &m.DevMajor) + stateSourceObject.Load(2, &m.DevMinor) + stateSourceObject.Load(3, &m.Endpoint) +} + +func (m *MountFlags) StateTypeName() string { + return "pkg/sentry/vfs.MountFlags" +} + +func (m *MountFlags) StateFields() []string { + return []string{ + "NoExec", + "NoATime", + "NoDev", + "NoSUID", + } +} + +func (m *MountFlags) beforeSave() {} + +// +checklocksignore +func (m *MountFlags) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.NoExec) + stateSinkObject.Save(1, &m.NoATime) + stateSinkObject.Save(2, &m.NoDev) + stateSinkObject.Save(3, &m.NoSUID) +} + +func (m *MountFlags) afterLoad() {} + +// +checklocksignore +func (m *MountFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.NoExec) + stateSourceObject.Load(1, &m.NoATime) + stateSourceObject.Load(2, &m.NoDev) + stateSourceObject.Load(3, &m.NoSUID) +} + +func (m *MountOptions) StateTypeName() string { + return "pkg/sentry/vfs.MountOptions" +} + +func (m *MountOptions) StateFields() []string { + return []string{ + "Flags", + "ReadOnly", + "GetFilesystemOptions", + "InternalMount", + } +} + +func (m *MountOptions) beforeSave() {} + +// +checklocksignore +func (m *MountOptions) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.Flags) + stateSinkObject.Save(1, &m.ReadOnly) + stateSinkObject.Save(2, &m.GetFilesystemOptions) + stateSinkObject.Save(3, &m.InternalMount) +} + +func (m *MountOptions) afterLoad() {} + +// +checklocksignore +func (m *MountOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.Flags) + stateSourceObject.Load(1, &m.ReadOnly) + stateSourceObject.Load(2, &m.GetFilesystemOptions) + stateSourceObject.Load(3, &m.InternalMount) +} + +func (o *OpenOptions) StateTypeName() string { + return "pkg/sentry/vfs.OpenOptions" +} + +func (o *OpenOptions) StateFields() []string { + return []string{ + "Flags", + "Mode", + "FileExec", + } +} + +func (o *OpenOptions) beforeSave() {} + +// +checklocksignore +func (o *OpenOptions) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.Flags) + stateSinkObject.Save(1, &o.Mode) + stateSinkObject.Save(2, &o.FileExec) +} + +func (o *OpenOptions) afterLoad() {} + +// +checklocksignore +func (o *OpenOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.Flags) + stateSourceObject.Load(1, &o.Mode) + stateSourceObject.Load(2, &o.FileExec) +} + +func (r *ReadOptions) StateTypeName() string { + return "pkg/sentry/vfs.ReadOptions" +} + +func (r *ReadOptions) StateFields() []string { + return []string{ + "Flags", + } +} + +func (r *ReadOptions) beforeSave() {} + +// +checklocksignore +func (r *ReadOptions) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Flags) +} + +func (r *ReadOptions) afterLoad() {} + +// +checklocksignore +func (r *ReadOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Flags) +} + +func (r *RenameOptions) StateTypeName() string { + return "pkg/sentry/vfs.RenameOptions" +} + +func (r *RenameOptions) StateFields() []string { + return []string{ + "Flags", + "MustBeDir", + } +} + +func (r *RenameOptions) beforeSave() {} + +// +checklocksignore +func (r *RenameOptions) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Flags) + stateSinkObject.Save(1, &r.MustBeDir) +} + +func (r *RenameOptions) afterLoad() {} + +// +checklocksignore +func (r *RenameOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Flags) + stateSourceObject.Load(1, &r.MustBeDir) +} + +func (s *SetStatOptions) StateTypeName() string { + return "pkg/sentry/vfs.SetStatOptions" +} + +func (s *SetStatOptions) StateFields() []string { + return []string{ + "Stat", + "NeedWritePerm", + } +} + +func (s *SetStatOptions) beforeSave() {} + +// +checklocksignore +func (s *SetStatOptions) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Stat) + stateSinkObject.Save(1, &s.NeedWritePerm) +} + +func (s *SetStatOptions) afterLoad() {} + +// +checklocksignore +func (s *SetStatOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Stat) + stateSourceObject.Load(1, &s.NeedWritePerm) +} + +func (b *BoundEndpointOptions) StateTypeName() string { + return "pkg/sentry/vfs.BoundEndpointOptions" +} + +func (b *BoundEndpointOptions) StateFields() []string { + return []string{ + "Addr", + } +} + +func (b *BoundEndpointOptions) beforeSave() {} + +// +checklocksignore +func (b *BoundEndpointOptions) StateSave(stateSinkObject state.Sink) { + b.beforeSave() + stateSinkObject.Save(0, &b.Addr) +} + +func (b *BoundEndpointOptions) afterLoad() {} + +// +checklocksignore +func (b *BoundEndpointOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &b.Addr) +} + +func (g *GetXattrOptions) StateTypeName() string { + return "pkg/sentry/vfs.GetXattrOptions" +} + +func (g *GetXattrOptions) StateFields() []string { + return []string{ + "Name", + "Size", + } +} + +func (g *GetXattrOptions) beforeSave() {} + +// +checklocksignore +func (g *GetXattrOptions) StateSave(stateSinkObject state.Sink) { + g.beforeSave() + stateSinkObject.Save(0, &g.Name) + stateSinkObject.Save(1, &g.Size) +} + +func (g *GetXattrOptions) afterLoad() {} + +// +checklocksignore +func (g *GetXattrOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &g.Name) + stateSourceObject.Load(1, &g.Size) +} + +func (s *SetXattrOptions) StateTypeName() string { + return "pkg/sentry/vfs.SetXattrOptions" +} + +func (s *SetXattrOptions) StateFields() []string { + return []string{ + "Name", + "Value", + "Flags", + } +} + +func (s *SetXattrOptions) beforeSave() {} + +// +checklocksignore +func (s *SetXattrOptions) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Name) + stateSinkObject.Save(1, &s.Value) + stateSinkObject.Save(2, &s.Flags) +} + +func (s *SetXattrOptions) afterLoad() {} + +// +checklocksignore +func (s *SetXattrOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Name) + stateSourceObject.Load(1, &s.Value) + stateSourceObject.Load(2, &s.Flags) +} + +func (s *StatOptions) StateTypeName() string { + return "pkg/sentry/vfs.StatOptions" +} + +func (s *StatOptions) StateFields() []string { + return []string{ + "Mask", + "Sync", + } +} + +func (s *StatOptions) beforeSave() {} + +// +checklocksignore +func (s *StatOptions) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Mask) + stateSinkObject.Save(1, &s.Sync) +} + +func (s *StatOptions) afterLoad() {} + +// +checklocksignore +func (s *StatOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Mask) + stateSourceObject.Load(1, &s.Sync) +} + +func (u *UmountOptions) StateTypeName() string { + return "pkg/sentry/vfs.UmountOptions" +} + +func (u *UmountOptions) StateFields() []string { + return []string{ + "Flags", + } +} + +func (u *UmountOptions) beforeSave() {} + +// +checklocksignore +func (u *UmountOptions) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.Flags) +} + +func (u *UmountOptions) afterLoad() {} + +// +checklocksignore +func (u *UmountOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.Flags) +} + +func (w *WriteOptions) StateTypeName() string { + return "pkg/sentry/vfs.WriteOptions" +} + +func (w *WriteOptions) StateFields() []string { + return []string{ + "Flags", + } +} + +func (w *WriteOptions) beforeSave() {} + +// +checklocksignore +func (w *WriteOptions) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.Flags) +} + +func (w *WriteOptions) afterLoad() {} + +// +checklocksignore +func (w *WriteOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.Flags) +} + +func (a *AccessTypes) StateTypeName() string { + return "pkg/sentry/vfs.AccessTypes" +} + +func (a *AccessTypes) StateFields() []string { + return nil +} + +func (rp *ResolvingPath) StateTypeName() string { + return "pkg/sentry/vfs.ResolvingPath" +} + +func (rp *ResolvingPath) StateFields() []string { + return []string{ + "vfs", + "root", + "mount", + "start", + "pit", + "flags", + "mustBeDir", + "symlinks", + "curPart", + "creds", + "nextMount", + "nextStart", + "absSymlinkTarget", + "parts", + } +} + +func (rp *ResolvingPath) beforeSave() {} + +// +checklocksignore +func (rp *ResolvingPath) StateSave(stateSinkObject state.Sink) { + rp.beforeSave() + stateSinkObject.Save(0, &rp.vfs) + stateSinkObject.Save(1, &rp.root) + stateSinkObject.Save(2, &rp.mount) + stateSinkObject.Save(3, &rp.start) + stateSinkObject.Save(4, &rp.pit) + stateSinkObject.Save(5, &rp.flags) + stateSinkObject.Save(6, &rp.mustBeDir) + stateSinkObject.Save(7, &rp.symlinks) + stateSinkObject.Save(8, &rp.curPart) + stateSinkObject.Save(9, &rp.creds) + stateSinkObject.Save(10, &rp.nextMount) + stateSinkObject.Save(11, &rp.nextStart) + stateSinkObject.Save(12, &rp.absSymlinkTarget) + stateSinkObject.Save(13, &rp.parts) +} + +func (rp *ResolvingPath) afterLoad() {} + +// +checklocksignore +func (rp *ResolvingPath) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rp.vfs) + stateSourceObject.Load(1, &rp.root) + stateSourceObject.Load(2, &rp.mount) + stateSourceObject.Load(3, &rp.start) + stateSourceObject.Load(4, &rp.pit) + stateSourceObject.Load(5, &rp.flags) + stateSourceObject.Load(6, &rp.mustBeDir) + stateSourceObject.Load(7, &rp.symlinks) + stateSourceObject.Load(8, &rp.curPart) + stateSourceObject.Load(9, &rp.creds) + stateSourceObject.Load(10, &rp.nextMount) + stateSourceObject.Load(11, &rp.nextStart) + stateSourceObject.Load(12, &rp.absSymlinkTarget) + stateSourceObject.Load(13, &rp.parts) +} + +func (r *resolveMountRootOrJumpError) StateTypeName() string { + return "pkg/sentry/vfs.resolveMountRootOrJumpError" +} + +func (r *resolveMountRootOrJumpError) StateFields() []string { + return []string{} +} + +func (r *resolveMountRootOrJumpError) beforeSave() {} + +// +checklocksignore +func (r *resolveMountRootOrJumpError) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *resolveMountRootOrJumpError) afterLoad() {} + +// +checklocksignore +func (r *resolveMountRootOrJumpError) StateLoad(stateSourceObject state.Source) { +} + +func (r *resolveMountPointError) StateTypeName() string { + return "pkg/sentry/vfs.resolveMountPointError" +} + +func (r *resolveMountPointError) StateFields() []string { + return []string{} +} + +func (r *resolveMountPointError) beforeSave() {} + +// +checklocksignore +func (r *resolveMountPointError) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *resolveMountPointError) afterLoad() {} + +// +checklocksignore +func (r *resolveMountPointError) StateLoad(stateSourceObject state.Source) { +} + +func (r *resolveAbsSymlinkError) StateTypeName() string { + return "pkg/sentry/vfs.resolveAbsSymlinkError" +} + +func (r *resolveAbsSymlinkError) StateFields() []string { + return []string{} +} + +func (r *resolveAbsSymlinkError) beforeSave() {} + +// +checklocksignore +func (r *resolveAbsSymlinkError) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *resolveAbsSymlinkError) afterLoad() {} + +// +checklocksignore +func (r *resolveAbsSymlinkError) StateLoad(stateSourceObject state.Source) { +} + +func (vfs *VirtualFilesystem) StateTypeName() string { + return "pkg/sentry/vfs.VirtualFilesystem" +} + +func (vfs *VirtualFilesystem) StateFields() []string { + return []string{ + "mounts", + "mountpoints", + "lastMountID", + "anonMount", + "devices", + "anonBlockDevMinorNext", + "anonBlockDevMinor", + "fsTypes", + "filesystems", + } +} + +func (vfs *VirtualFilesystem) beforeSave() {} + +// +checklocksignore +func (vfs *VirtualFilesystem) StateSave(stateSinkObject state.Sink) { + vfs.beforeSave() + var mountsValue []*Mount = vfs.saveMounts() + stateSinkObject.SaveValue(0, mountsValue) + stateSinkObject.Save(1, &vfs.mountpoints) + stateSinkObject.Save(2, &vfs.lastMountID) + stateSinkObject.Save(3, &vfs.anonMount) + stateSinkObject.Save(4, &vfs.devices) + stateSinkObject.Save(5, &vfs.anonBlockDevMinorNext) + stateSinkObject.Save(6, &vfs.anonBlockDevMinor) + stateSinkObject.Save(7, &vfs.fsTypes) + stateSinkObject.Save(8, &vfs.filesystems) +} + +func (vfs *VirtualFilesystem) afterLoad() {} + +// +checklocksignore +func (vfs *VirtualFilesystem) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(1, &vfs.mountpoints) + stateSourceObject.Load(2, &vfs.lastMountID) + stateSourceObject.Load(3, &vfs.anonMount) + stateSourceObject.Load(4, &vfs.devices) + stateSourceObject.Load(5, &vfs.anonBlockDevMinorNext) + stateSourceObject.Load(6, &vfs.anonBlockDevMinor) + stateSourceObject.Load(7, &vfs.fsTypes) + stateSourceObject.Load(8, &vfs.filesystems) + stateSourceObject.LoadValue(0, new([]*Mount), func(y interface{}) { vfs.loadMounts(y.([]*Mount)) }) +} + +func (p *PathOperation) StateTypeName() string { + return "pkg/sentry/vfs.PathOperation" +} + +func (p *PathOperation) StateFields() []string { + return []string{ + "Root", + "Start", + "Path", + "FollowFinalSymlink", + } +} + +func (p *PathOperation) beforeSave() {} + +// +checklocksignore +func (p *PathOperation) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Root) + stateSinkObject.Save(1, &p.Start) + stateSinkObject.Save(2, &p.Path) + stateSinkObject.Save(3, &p.FollowFinalSymlink) +} + +func (p *PathOperation) afterLoad() {} + +// +checklocksignore +func (p *PathOperation) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Root) + stateSourceObject.Load(1, &p.Start) + stateSourceObject.Load(2, &p.Path) + stateSourceObject.Load(3, &p.FollowFinalSymlink) +} + +func (vd *VirtualDentry) StateTypeName() string { + return "pkg/sentry/vfs.VirtualDentry" +} + +func (vd *VirtualDentry) StateFields() []string { + return []string{ + "mount", + "dentry", + } +} + +func (vd *VirtualDentry) beforeSave() {} + +// +checklocksignore +func (vd *VirtualDentry) StateSave(stateSinkObject state.Sink) { + vd.beforeSave() + stateSinkObject.Save(0, &vd.mount) + stateSinkObject.Save(1, &vd.dentry) +} + +func (vd *VirtualDentry) afterLoad() {} + +// +checklocksignore +func (vd *VirtualDentry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &vd.mount) + stateSourceObject.Load(1, &vd.dentry) +} + +func init() { + state.Register((*anonFilesystemType)(nil)) + state.Register((*anonFilesystem)(nil)) + state.Register((*anonDentry)(nil)) + state.Register((*Dentry)(nil)) + state.Register((*DeviceKind)(nil)) + state.Register((*devTuple)(nil)) + state.Register((*registeredDevice)(nil)) + state.Register((*RegisterDeviceOptions)(nil)) + state.Register((*EpollInstance)(nil)) + state.Register((*epollInterestKey)(nil)) + state.Register((*epollInterest)(nil)) + state.Register((*epollInterestList)(nil)) + state.Register((*epollInterestEntry)(nil)) + state.Register((*eventList)(nil)) + state.Register((*eventEntry)(nil)) + state.Register((*FileDescription)(nil)) + state.Register((*FileDescriptionOptions)(nil)) + state.Register((*Dirent)(nil)) + state.Register((*FileDescriptionDefaultImpl)(nil)) + state.Register((*DirectoryFileDescriptionDefaultImpl)(nil)) + state.Register((*DentryMetadataFileDescriptionImpl)(nil)) + state.Register((*StaticData)(nil)) + state.Register((*DynamicBytesFileDescriptionImpl)(nil)) + state.Register((*LockFD)(nil)) + state.Register((*NoLockFD)(nil)) + state.Register((*BadLockFD)(nil)) + state.Register((*FileDescriptionRefs)(nil)) + state.Register((*Filesystem)(nil)) + state.Register((*PrependPathAtVFSRootError)(nil)) + state.Register((*PrependPathAtNonMountRootError)(nil)) + state.Register((*PrependPathSyntheticError)(nil)) + state.Register((*FilesystemRefs)(nil)) + state.Register((*registeredFilesystemType)(nil)) + state.Register((*RegisterFilesystemTypeOptions)(nil)) + state.Register((*EventType)(nil)) + state.Register((*Inotify)(nil)) + state.Register((*Watches)(nil)) + state.Register((*Watch)(nil)) + state.Register((*Event)(nil)) + state.Register((*FileLocks)(nil)) + state.Register((*Mount)(nil)) + state.Register((*MountNamespace)(nil)) + state.Register((*umountRecursiveOptions)(nil)) + state.Register((*MountNamespaceRefs)(nil)) + state.Register((*opathFD)(nil)) + state.Register((*GetDentryOptions)(nil)) + state.Register((*MkdirOptions)(nil)) + state.Register((*MknodOptions)(nil)) + state.Register((*MountFlags)(nil)) + state.Register((*MountOptions)(nil)) + state.Register((*OpenOptions)(nil)) + state.Register((*ReadOptions)(nil)) + state.Register((*RenameOptions)(nil)) + state.Register((*SetStatOptions)(nil)) + state.Register((*BoundEndpointOptions)(nil)) + state.Register((*GetXattrOptions)(nil)) + state.Register((*SetXattrOptions)(nil)) + state.Register((*StatOptions)(nil)) + state.Register((*UmountOptions)(nil)) + state.Register((*WriteOptions)(nil)) + state.Register((*AccessTypes)(nil)) + state.Register((*ResolvingPath)(nil)) + state.Register((*resolveMountRootOrJumpError)(nil)) + state.Register((*resolveMountPointError)(nil)) + state.Register((*resolveAbsSymlinkError)(nil)) + state.Register((*VirtualFilesystem)(nil)) + state.Register((*PathOperation)(nil)) + state.Register((*VirtualDentry)(nil)) +} diff --git a/pkg/sentry/vfs/vfs_unsafe_state_autogen.go b/pkg/sentry/vfs/vfs_unsafe_state_autogen.go new file mode 100644 index 000000000..20f06c953 --- /dev/null +++ b/pkg/sentry/vfs/vfs_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package vfs diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD deleted file mode 100644 index 1c5a1c9b6..000000000 --- a/pkg/sentry/watchdog/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "watchdog", - srcs = ["watchdog.go"], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/metric", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sync", - ], -) diff --git a/pkg/sentry/watchdog/watchdog_state_autogen.go b/pkg/sentry/watchdog/watchdog_state_autogen.go new file mode 100644 index 000000000..bce0200e7 --- /dev/null +++ b/pkg/sentry/watchdog/watchdog_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package watchdog |