diff options
Diffstat (limited to 'pkg/sentry')
352 files changed, 29338 insertions, 31236 deletions
diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD deleted file mode 100644 index 53989301f..000000000 --- a/pkg/sentry/BUILD +++ /dev/null @@ -1,12 +0,0 @@ -# This BUILD file defines a package_group that allows for interdependencies for -# sentry-internal packages. - -package(licenses = ["notice"]) - -package_group( - name = "internal", - packages = [ - "//pkg/sentry/...", - "//runsc/...", - ], -) diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD deleted file mode 100644 index 7aace2d7b..000000000 --- a/pkg/sentry/arch/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "arch", - srcs = [ - "aligned.go", - "arch.go", - "arch_amd64.go", - "arch_amd64.s", - "arch_state_x86.go", - "arch_x86.go", - "auxv.go", - "signal_act.go", - "signal_amd64.go", - "signal_info.go", - "signal_stack.go", - "stack.go", - "syscalls_amd64.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/arch", - visibility = ["//:sandbox"], - deps = [ - ":registers_go_proto", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/cpuid", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/sentry/limits", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) - -proto_library( - name = "registers_proto", - srcs = ["registers.proto"], - visibility = ["//visibility:public"], -) - -go_proto_library( - name = "registers_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto", - proto = ":registers_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/arch/arch_state_autogen.go b/pkg/sentry/arch/arch_state_autogen.go new file mode 100755 index 000000000..9b1205531 --- /dev/null +++ b/pkg/sentry/arch/arch_state_autogen.go @@ -0,0 +1,193 @@ +// automatically generated by stateify. + +package arch + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *MmapLayout) beforeSave() {} +func (x *MmapLayout) save(m state.Map) { + x.beforeSave() + m.Save("MinAddr", &x.MinAddr) + m.Save("MaxAddr", &x.MaxAddr) + m.Save("BottomUpBase", &x.BottomUpBase) + m.Save("TopDownBase", &x.TopDownBase) + m.Save("DefaultDirection", &x.DefaultDirection) + m.Save("MaxStackRand", &x.MaxStackRand) +} + +func (x *MmapLayout) afterLoad() {} +func (x *MmapLayout) load(m state.Map) { + m.Load("MinAddr", &x.MinAddr) + m.Load("MaxAddr", &x.MaxAddr) + m.Load("BottomUpBase", &x.BottomUpBase) + m.Load("TopDownBase", &x.TopDownBase) + m.Load("DefaultDirection", &x.DefaultDirection) + m.Load("MaxStackRand", &x.MaxStackRand) +} + +func (x *context64) beforeSave() {} +func (x *context64) save(m state.Map) { + x.beforeSave() + m.Save("State", &x.State) + m.Save("sigFPState", &x.sigFPState) +} + +func (x *context64) afterLoad() {} +func (x *context64) load(m state.Map) { + m.Load("State", &x.State) + m.Load("sigFPState", &x.sigFPState) +} + +func (x *syscallPtraceRegs) beforeSave() {} +func (x *syscallPtraceRegs) save(m state.Map) { + x.beforeSave() + m.Save("R15", &x.R15) + m.Save("R14", &x.R14) + m.Save("R13", &x.R13) + m.Save("R12", &x.R12) + m.Save("Rbp", &x.Rbp) + m.Save("Rbx", &x.Rbx) + m.Save("R11", &x.R11) + m.Save("R10", &x.R10) + m.Save("R9", &x.R9) + m.Save("R8", &x.R8) + m.Save("Rax", &x.Rax) + m.Save("Rcx", &x.Rcx) + m.Save("Rdx", &x.Rdx) + m.Save("Rsi", &x.Rsi) + m.Save("Rdi", &x.Rdi) + m.Save("Orig_rax", &x.Orig_rax) + m.Save("Rip", &x.Rip) + m.Save("Cs", &x.Cs) + m.Save("Eflags", &x.Eflags) + m.Save("Rsp", &x.Rsp) + m.Save("Ss", &x.Ss) + m.Save("Fs_base", &x.Fs_base) + m.Save("Gs_base", &x.Gs_base) + m.Save("Ds", &x.Ds) + m.Save("Es", &x.Es) + m.Save("Fs", &x.Fs) + m.Save("Gs", &x.Gs) +} + +func (x *syscallPtraceRegs) afterLoad() {} +func (x *syscallPtraceRegs) load(m state.Map) { + m.Load("R15", &x.R15) + m.Load("R14", &x.R14) + m.Load("R13", &x.R13) + m.Load("R12", &x.R12) + m.Load("Rbp", &x.Rbp) + m.Load("Rbx", &x.Rbx) + m.Load("R11", &x.R11) + m.Load("R10", &x.R10) + m.Load("R9", &x.R9) + m.Load("R8", &x.R8) + m.Load("Rax", &x.Rax) + m.Load("Rcx", &x.Rcx) + m.Load("Rdx", &x.Rdx) + m.Load("Rsi", &x.Rsi) + m.Load("Rdi", &x.Rdi) + m.Load("Orig_rax", &x.Orig_rax) + m.Load("Rip", &x.Rip) + m.Load("Cs", &x.Cs) + m.Load("Eflags", &x.Eflags) + m.Load("Rsp", &x.Rsp) + m.Load("Ss", &x.Ss) + m.Load("Fs_base", &x.Fs_base) + m.Load("Gs_base", &x.Gs_base) + m.Load("Ds", &x.Ds) + m.Load("Es", &x.Es) + m.Load("Fs", &x.Fs) + m.Load("Gs", &x.Gs) +} + +func (x *State) beforeSave() {} +func (x *State) save(m state.Map) { + x.beforeSave() + var Regs syscallPtraceRegs = x.saveRegs() + m.SaveValue("Regs", Regs) + m.Save("x86FPState", &x.x86FPState) + m.Save("FeatureSet", &x.FeatureSet) +} + +func (x *State) load(m state.Map) { + m.LoadWait("x86FPState", &x.x86FPState) + m.Load("FeatureSet", &x.FeatureSet) + m.LoadValue("Regs", new(syscallPtraceRegs), func(y interface{}) { x.loadRegs(y.(syscallPtraceRegs)) }) + m.AfterLoad(x.afterLoad) +} + +func (x *AuxEntry) beforeSave() {} +func (x *AuxEntry) save(m state.Map) { + x.beforeSave() + m.Save("Key", &x.Key) + m.Save("Value", &x.Value) +} + +func (x *AuxEntry) afterLoad() {} +func (x *AuxEntry) load(m state.Map) { + m.Load("Key", &x.Key) + m.Load("Value", &x.Value) +} + +func (x *SignalAct) beforeSave() {} +func (x *SignalAct) save(m state.Map) { + x.beforeSave() + m.Save("Handler", &x.Handler) + m.Save("Flags", &x.Flags) + m.Save("Restorer", &x.Restorer) + m.Save("Mask", &x.Mask) +} + +func (x *SignalAct) afterLoad() {} +func (x *SignalAct) load(m state.Map) { + m.Load("Handler", &x.Handler) + m.Load("Flags", &x.Flags) + m.Load("Restorer", &x.Restorer) + m.Load("Mask", &x.Mask) +} + +func (x *SignalStack) beforeSave() {} +func (x *SignalStack) save(m state.Map) { + x.beforeSave() + m.Save("Addr", &x.Addr) + m.Save("Flags", &x.Flags) + m.Save("Size", &x.Size) +} + +func (x *SignalStack) afterLoad() {} +func (x *SignalStack) load(m state.Map) { + m.Load("Addr", &x.Addr) + m.Load("Flags", &x.Flags) + m.Load("Size", &x.Size) +} + +func (x *SignalInfo) beforeSave() {} +func (x *SignalInfo) save(m state.Map) { + x.beforeSave() + m.Save("Signo", &x.Signo) + m.Save("Errno", &x.Errno) + m.Save("Code", &x.Code) + m.Save("Fields", &x.Fields) +} + +func (x *SignalInfo) afterLoad() {} +func (x *SignalInfo) load(m state.Map) { + m.Load("Signo", &x.Signo) + m.Load("Errno", &x.Errno) + m.Load("Code", &x.Code) + m.Load("Fields", &x.Fields) +} + +func init() { + state.Register("arch.MmapLayout", (*MmapLayout)(nil), state.Fns{Save: (*MmapLayout).save, Load: (*MmapLayout).load}) + state.Register("arch.context64", (*context64)(nil), state.Fns{Save: (*context64).save, Load: (*context64).load}) + state.Register("arch.syscallPtraceRegs", (*syscallPtraceRegs)(nil), state.Fns{Save: (*syscallPtraceRegs).save, Load: (*syscallPtraceRegs).load}) + state.Register("arch.State", (*State)(nil), state.Fns{Save: (*State).save, Load: (*State).load}) + state.Register("arch.AuxEntry", (*AuxEntry)(nil), state.Fns{Save: (*AuxEntry).save, Load: (*AuxEntry).load}) + state.Register("arch.SignalAct", (*SignalAct)(nil), state.Fns{Save: (*SignalAct).save, Load: (*SignalAct).load}) + state.Register("arch.SignalStack", (*SignalStack)(nil), state.Fns{Save: (*SignalStack).save, Load: (*SignalStack).load}) + state.Register("arch.SignalInfo", (*SignalInfo)(nil), state.Fns{Save: (*SignalInfo).save, Load: (*SignalInfo).load}) +} diff --git a/pkg/sentry/arch/registers.proto b/pkg/sentry/arch/registers.proto deleted file mode 100644 index 9dc83e241..000000000 --- a/pkg/sentry/arch/registers.proto +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -message AMD64Registers { - uint64 rax = 1; - uint64 rbx = 2; - uint64 rcx = 3; - uint64 rdx = 4; - uint64 rsi = 5; - uint64 rdi = 6; - uint64 rsp = 7; - uint64 rbp = 8; - - uint64 r8 = 9; - uint64 r9 = 10; - uint64 r10 = 11; - uint64 r11 = 12; - uint64 r12 = 13; - uint64 r13 = 14; - uint64 r14 = 15; - uint64 r15 = 16; - - uint64 rip = 17; - uint64 rflags = 18; - uint64 orig_rax = 19; - uint64 cs = 20; - uint64 ds = 21; - uint64 es = 22; - uint64 fs = 23; - uint64 gs = 24; - uint64 ss = 25; - uint64 fs_base = 26; - uint64 gs_base = 27; -} - -message Registers { - oneof arch { - AMD64Registers amd64 = 1; - } -} diff --git a/pkg/sentry/arch/registers_go_proto/registers.pb.go b/pkg/sentry/arch/registers_go_proto/registers.pb.go new file mode 100755 index 000000000..088209be7 --- /dev/null +++ b/pkg/sentry/arch/registers_go_proto/registers.pb.go @@ -0,0 +1,367 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: pkg/sentry/arch/registers.proto + +package gvisor + +import ( + fmt "fmt" + proto "github.com/golang/protobuf/proto" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package + +type AMD64Registers struct { + Rax uint64 `protobuf:"varint,1,opt,name=rax,proto3" json:"rax,omitempty"` + Rbx uint64 `protobuf:"varint,2,opt,name=rbx,proto3" json:"rbx,omitempty"` + Rcx uint64 `protobuf:"varint,3,opt,name=rcx,proto3" json:"rcx,omitempty"` + Rdx uint64 `protobuf:"varint,4,opt,name=rdx,proto3" json:"rdx,omitempty"` + Rsi uint64 `protobuf:"varint,5,opt,name=rsi,proto3" json:"rsi,omitempty"` + Rdi uint64 `protobuf:"varint,6,opt,name=rdi,proto3" json:"rdi,omitempty"` + Rsp uint64 `protobuf:"varint,7,opt,name=rsp,proto3" json:"rsp,omitempty"` + Rbp uint64 `protobuf:"varint,8,opt,name=rbp,proto3" json:"rbp,omitempty"` + R8 uint64 `protobuf:"varint,9,opt,name=r8,proto3" json:"r8,omitempty"` + R9 uint64 `protobuf:"varint,10,opt,name=r9,proto3" json:"r9,omitempty"` + R10 uint64 `protobuf:"varint,11,opt,name=r10,proto3" json:"r10,omitempty"` + R11 uint64 `protobuf:"varint,12,opt,name=r11,proto3" json:"r11,omitempty"` + R12 uint64 `protobuf:"varint,13,opt,name=r12,proto3" json:"r12,omitempty"` + R13 uint64 `protobuf:"varint,14,opt,name=r13,proto3" json:"r13,omitempty"` + R14 uint64 `protobuf:"varint,15,opt,name=r14,proto3" json:"r14,omitempty"` + R15 uint64 `protobuf:"varint,16,opt,name=r15,proto3" json:"r15,omitempty"` + Rip uint64 `protobuf:"varint,17,opt,name=rip,proto3" json:"rip,omitempty"` + Rflags uint64 `protobuf:"varint,18,opt,name=rflags,proto3" json:"rflags,omitempty"` + OrigRax uint64 `protobuf:"varint,19,opt,name=orig_rax,json=origRax,proto3" json:"orig_rax,omitempty"` + Cs uint64 `protobuf:"varint,20,opt,name=cs,proto3" json:"cs,omitempty"` + Ds uint64 `protobuf:"varint,21,opt,name=ds,proto3" json:"ds,omitempty"` + Es uint64 `protobuf:"varint,22,opt,name=es,proto3" json:"es,omitempty"` + Fs uint64 `protobuf:"varint,23,opt,name=fs,proto3" json:"fs,omitempty"` + Gs uint64 `protobuf:"varint,24,opt,name=gs,proto3" json:"gs,omitempty"` + Ss uint64 `protobuf:"varint,25,opt,name=ss,proto3" json:"ss,omitempty"` + FsBase uint64 `protobuf:"varint,26,opt,name=fs_base,json=fsBase,proto3" json:"fs_base,omitempty"` + GsBase uint64 `protobuf:"varint,27,opt,name=gs_base,json=gsBase,proto3" json:"gs_base,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *AMD64Registers) Reset() { *m = AMD64Registers{} } +func (m *AMD64Registers) String() string { return proto.CompactTextString(m) } +func (*AMD64Registers) ProtoMessage() {} +func (*AMD64Registers) Descriptor() ([]byte, []int) { + return fileDescriptor_082b7510610e0457, []int{0} +} + +func (m *AMD64Registers) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_AMD64Registers.Unmarshal(m, b) +} +func (m *AMD64Registers) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_AMD64Registers.Marshal(b, m, deterministic) +} +func (m *AMD64Registers) XXX_Merge(src proto.Message) { + xxx_messageInfo_AMD64Registers.Merge(m, src) +} +func (m *AMD64Registers) XXX_Size() int { + return xxx_messageInfo_AMD64Registers.Size(m) +} +func (m *AMD64Registers) XXX_DiscardUnknown() { + xxx_messageInfo_AMD64Registers.DiscardUnknown(m) +} + +var xxx_messageInfo_AMD64Registers proto.InternalMessageInfo + +func (m *AMD64Registers) GetRax() uint64 { + if m != nil { + return m.Rax + } + return 0 +} + +func (m *AMD64Registers) GetRbx() uint64 { + if m != nil { + return m.Rbx + } + return 0 +} + +func (m *AMD64Registers) GetRcx() uint64 { + if m != nil { + return m.Rcx + } + return 0 +} + +func (m *AMD64Registers) GetRdx() uint64 { + if m != nil { + return m.Rdx + } + return 0 +} + +func (m *AMD64Registers) GetRsi() uint64 { + if m != nil { + return m.Rsi + } + return 0 +} + +func (m *AMD64Registers) GetRdi() uint64 { + if m != nil { + return m.Rdi + } + return 0 +} + +func (m *AMD64Registers) GetRsp() uint64 { + if m != nil { + return m.Rsp + } + return 0 +} + +func (m *AMD64Registers) GetRbp() uint64 { + if m != nil { + return m.Rbp + } + return 0 +} + +func (m *AMD64Registers) GetR8() uint64 { + if m != nil { + return m.R8 + } + return 0 +} + +func (m *AMD64Registers) GetR9() uint64 { + if m != nil { + return m.R9 + } + return 0 +} + +func (m *AMD64Registers) GetR10() uint64 { + if m != nil { + return m.R10 + } + return 0 +} + +func (m *AMD64Registers) GetR11() uint64 { + if m != nil { + return m.R11 + } + return 0 +} + +func (m *AMD64Registers) GetR12() uint64 { + if m != nil { + return m.R12 + } + return 0 +} + +func (m *AMD64Registers) GetR13() uint64 { + if m != nil { + return m.R13 + } + return 0 +} + +func (m *AMD64Registers) GetR14() uint64 { + if m != nil { + return m.R14 + } + return 0 +} + +func (m *AMD64Registers) GetR15() uint64 { + if m != nil { + return m.R15 + } + return 0 +} + +func (m *AMD64Registers) GetRip() uint64 { + if m != nil { + return m.Rip + } + return 0 +} + +func (m *AMD64Registers) GetRflags() uint64 { + if m != nil { + return m.Rflags + } + return 0 +} + +func (m *AMD64Registers) GetOrigRax() uint64 { + if m != nil { + return m.OrigRax + } + return 0 +} + +func (m *AMD64Registers) GetCs() uint64 { + if m != nil { + return m.Cs + } + return 0 +} + +func (m *AMD64Registers) GetDs() uint64 { + if m != nil { + return m.Ds + } + return 0 +} + +func (m *AMD64Registers) GetEs() uint64 { + if m != nil { + return m.Es + } + return 0 +} + +func (m *AMD64Registers) GetFs() uint64 { + if m != nil { + return m.Fs + } + return 0 +} + +func (m *AMD64Registers) GetGs() uint64 { + if m != nil { + return m.Gs + } + return 0 +} + +func (m *AMD64Registers) GetSs() uint64 { + if m != nil { + return m.Ss + } + return 0 +} + +func (m *AMD64Registers) GetFsBase() uint64 { + if m != nil { + return m.FsBase + } + return 0 +} + +func (m *AMD64Registers) GetGsBase() uint64 { + if m != nil { + return m.GsBase + } + return 0 +} + +type Registers struct { + // Types that are valid to be assigned to Arch: + // *Registers_Amd64 + Arch isRegisters_Arch `protobuf_oneof:"arch"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Registers) Reset() { *m = Registers{} } +func (m *Registers) String() string { return proto.CompactTextString(m) } +func (*Registers) ProtoMessage() {} +func (*Registers) Descriptor() ([]byte, []int) { + return fileDescriptor_082b7510610e0457, []int{1} +} + +func (m *Registers) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Registers.Unmarshal(m, b) +} +func (m *Registers) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Registers.Marshal(b, m, deterministic) +} +func (m *Registers) XXX_Merge(src proto.Message) { + xxx_messageInfo_Registers.Merge(m, src) +} +func (m *Registers) XXX_Size() int { + return xxx_messageInfo_Registers.Size(m) +} +func (m *Registers) XXX_DiscardUnknown() { + xxx_messageInfo_Registers.DiscardUnknown(m) +} + +var xxx_messageInfo_Registers proto.InternalMessageInfo + +type isRegisters_Arch interface { + isRegisters_Arch() +} + +type Registers_Amd64 struct { + Amd64 *AMD64Registers `protobuf:"bytes,1,opt,name=amd64,proto3,oneof"` +} + +func (*Registers_Amd64) isRegisters_Arch() {} + +func (m *Registers) GetArch() isRegisters_Arch { + if m != nil { + return m.Arch + } + return nil +} + +func (m *Registers) GetAmd64() *AMD64Registers { + if x, ok := m.GetArch().(*Registers_Amd64); ok { + return x.Amd64 + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*Registers) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*Registers_Amd64)(nil), + } +} + +func init() { + proto.RegisterType((*AMD64Registers)(nil), "gvisor.AMD64Registers") + proto.RegisterType((*Registers)(nil), "gvisor.Registers") +} + +func init() { proto.RegisterFile("pkg/sentry/arch/registers.proto", fileDescriptor_082b7510610e0457) } + +var fileDescriptor_082b7510610e0457 = []byte{ + // 354 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x54, 0x92, 0x4d, 0x6f, 0xe2, 0x30, + 0x10, 0x86, 0x17, 0x08, 0x01, 0xcc, 0x2e, 0xcb, 0x66, 0x5b, 0x18, 0xda, 0x43, 0x2b, 0x4e, 0x3d, + 0x05, 0x02, 0x01, 0xc1, 0xb1, 0xb4, 0x87, 0x5e, 0x7a, 0xc9, 0x1f, 0x40, 0xf9, 0x74, 0xad, 0x7e, + 0xc4, 0xf2, 0xa0, 0x2a, 0x3d, 0xf7, 0x8f, 0x57, 0xf6, 0xd8, 0xaa, 0x7a, 0xcb, 0xf3, 0xcc, 0x6b, + 0x39, 0x93, 0xbc, 0xec, 0x4a, 0x3e, 0xf3, 0x05, 0x96, 0x6f, 0x27, 0xf5, 0xb1, 0x48, 0x55, 0xfe, + 0xb4, 0x50, 0x25, 0x17, 0x78, 0x2a, 0x15, 0x86, 0x52, 0xd5, 0xa7, 0x3a, 0xf0, 0xf9, 0xbb, 0xc0, + 0x5a, 0xcd, 0x3f, 0x3d, 0x36, 0xba, 0x7d, 0xbc, 0xdf, 0xc6, 0x89, 0x0b, 0x04, 0x63, 0xd6, 0x51, + 0x69, 0x03, 0xad, 0xeb, 0xd6, 0x8d, 0x97, 0xe8, 0x47, 0x63, 0xb2, 0x06, 0xda, 0xd6, 0x64, 0x64, + 0xf2, 0x06, 0x3a, 0xd6, 0xe4, 0x64, 0x8a, 0x06, 0x3c, 0x6b, 0x0a, 0x32, 0x28, 0xa0, 0x6b, 0x0d, + 0x0a, 0xca, 0x08, 0xf0, 0x5d, 0x86, 0x0c, 0x4a, 0xe8, 0xb9, 0x8c, 0xa4, 0xbb, 0x24, 0xf4, 0xdd, + 0x5d, 0x32, 0x18, 0xb1, 0xb6, 0xda, 0xc1, 0xc0, 0x88, 0xb6, 0xda, 0x19, 0xde, 0x03, 0xb3, 0xbc, + 0x37, 0x27, 0xa2, 0x25, 0x0c, 0xed, 0x89, 0x68, 0x49, 0x26, 0x82, 0xdf, 0xce, 0x44, 0x64, 0x56, + 0xf0, 0xc7, 0x99, 0x15, 0x99, 0x35, 0x8c, 0x9c, 0x59, 0x93, 0x89, 0xe1, 0xaf, 0x33, 0x31, 0x99, + 0x0d, 0x8c, 0x9d, 0xd9, 0x18, 0x23, 0x24, 0xfc, 0xb3, 0x46, 0xc8, 0x60, 0xc2, 0x7c, 0x55, 0xbd, + 0xa4, 0x1c, 0x21, 0x30, 0xd2, 0x52, 0x30, 0x63, 0xfd, 0x5a, 0x09, 0x7e, 0xd4, 0x9f, 0xf2, 0xbf, + 0x99, 0xf4, 0x34, 0x27, 0x69, 0xa3, 0x17, 0xc8, 0x11, 0xce, 0x68, 0x81, 0x1c, 0x35, 0x17, 0x08, + 0xe7, 0xc4, 0x85, 0xe1, 0x12, 0x61, 0x42, 0x5c, 0x1a, 0xae, 0x10, 0xa6, 0xc4, 0x95, 0x61, 0x8e, + 0x00, 0xc4, 0xdc, 0x30, 0x22, 0xcc, 0x88, 0x11, 0x83, 0x29, 0xeb, 0x55, 0x78, 0xcc, 0x52, 0x2c, + 0xe1, 0x82, 0xde, 0xa9, 0xc2, 0x43, 0x8a, 0xa5, 0x1e, 0x70, 0x3b, 0xb8, 0xa4, 0x01, 0x37, 0x83, + 0xf9, 0x1d, 0x1b, 0x7c, 0xff, 0xff, 0x90, 0x75, 0xd3, 0xd7, 0x62, 0x1b, 0x9b, 0x06, 0x0c, 0x57, + 0x93, 0x90, 0xaa, 0x12, 0xfe, 0xac, 0xc9, 0xc3, 0xaf, 0x84, 0x62, 0x07, 0x9f, 0x79, 0xba, 0x62, + 0x99, 0x6f, 0x9a, 0xb5, 0xfe, 0x0a, 0x00, 0x00, 0xff, 0xff, 0xb4, 0xcc, 0x03, 0x27, 0x7c, 0x02, + 0x00, 0x00, +} diff --git a/pkg/sentry/context/BUILD b/pkg/sentry/context/BUILD deleted file mode 100644 index 8dc1a77b1..000000000 --- a/pkg/sentry/context/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "context", - srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/context", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/amutex", - "//pkg/log", - ], -) diff --git a/pkg/sentry/context/context_state_autogen.go b/pkg/sentry/context/context_state_autogen.go new file mode 100755 index 000000000..7dd55bfea --- /dev/null +++ b/pkg/sentry/context/context_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package context + diff --git a/pkg/sentry/context/contexttest/BUILD b/pkg/sentry/context/contexttest/BUILD deleted file mode 100644 index 3b6841b7e..000000000 --- a/pkg/sentry/context/contexttest/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "contexttest", - testonly = 1, - srcs = ["contexttest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/context/contexttest", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/memutil", - "//pkg/sentry/context", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/platform/ptrace", - "//pkg/sentry/uniqueid", - ], -) diff --git a/pkg/sentry/context/contexttest/contexttest.go b/pkg/sentry/context/contexttest/contexttest.go deleted file mode 100644 index 15cf086a9..000000000 --- a/pkg/sentry/context/contexttest/contexttest.go +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package contexttest builds a test context.Context. -package contexttest - -import ( - "os" - "sync/atomic" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" - "gvisor.dev/gvisor/pkg/sentry/uniqueid" -) - -// Context returns a Context that may be used in tests. Uses ptrace as the -// platform.Platform. -// -// Note that some filesystems may require a minimal kernel for testing, which -// this test context does not provide. For such tests, see kernel/contexttest. -func Context(tb testing.TB) context.Context { - const memfileName = "contexttest-memory" - memfd, err := memutil.CreateMemFD(memfileName, 0) - if err != nil { - tb.Fatalf("error creating application memory file: %v", err) - } - memfile := os.NewFile(uintptr(memfd), memfileName) - mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) - if err != nil { - memfile.Close() - tb.Fatalf("error creating pgalloc.MemoryFile: %v", err) - } - p, err := ptrace.New() - if err != nil { - tb.Fatal(err) - } - // Test usage of context.Background is fine. - return &TestContext{ - Context: context.Background(), - l: limits.NewLimitSet(), - mf: mf, - platform: p, - creds: auth.NewAnonymousCredentials(), - otherValues: make(map[interface{}]interface{}), - } -} - -// TestContext represents a context with minimal functionality suitable for -// running tests. -type TestContext struct { - context.Context - l *limits.LimitSet - mf *pgalloc.MemoryFile - platform platform.Platform - creds *auth.Credentials - otherValues map[interface{}]interface{} -} - -// globalUniqueID tracks incremental unique identifiers for tests. -var globalUniqueID uint64 - -// globalUniqueIDProvider implements unix.UniqueIDProvider. -type globalUniqueIDProvider struct{} - -// UniqueID implements unix.UniqueIDProvider.UniqueID. -func (*globalUniqueIDProvider) UniqueID() uint64 { - return atomic.AddUint64(&globalUniqueID, 1) -} - -// lastInotifyCookie is a monotonically increasing counter for generating unique -// inotify cookies. Must be accessed using atomic ops. -var lastInotifyCookie uint32 - -// hostClock implements ktime.Clock. -type hostClock struct { - ktime.WallRateClock - ktime.NoClockEvents -} - -// Now implements ktime.Clock.Now. -func (hostClock) Now() ktime.Time { - return ktime.FromNanoseconds(time.Now().UnixNano()) -} - -// RegisterValue registers additional values with this test context. Useful for -// providing values from external packages that contexttest can't depend on. -func (t *TestContext) RegisterValue(key, value interface{}) { - t.otherValues[key] = value -} - -// Value implements context.Context. -func (t *TestContext) Value(key interface{}) interface{} { - switch key { - case auth.CtxCredentials: - return t.creds - case limits.CtxLimits: - return t.l - case pgalloc.CtxMemoryFile: - return t.mf - case pgalloc.CtxMemoryFileProvider: - return t - case platform.CtxPlatform: - return t.platform - case uniqueid.CtxGlobalUniqueID: - return (*globalUniqueIDProvider).UniqueID(nil) - case uniqueid.CtxGlobalUniqueIDProvider: - return &globalUniqueIDProvider{} - case uniqueid.CtxInotifyCookie: - return atomic.AddUint32(&lastInotifyCookie, 1) - case ktime.CtxRealtimeClock: - return hostClock{} - default: - if val, ok := t.otherValues[key]; ok { - return val - } - return t.Context.Value(key) - } -} - -// MemoryFile implements pgalloc.MemoryFileProvider.MemoryFile. -func (t *TestContext) MemoryFile() *pgalloc.MemoryFile { - return t.mf -} - -// RootContext returns a Context that may be used in tests that need root -// credentials. Uses ptrace as the platform.Platform. -func RootContext(tb testing.TB) context.Context { - return WithCreds(Context(tb), auth.NewRootCredentials(auth.NewRootUserNamespace())) -} - -// WithCreds returns a copy of ctx carrying creds. -func WithCreds(ctx context.Context, creds *auth.Credentials) context.Context { - return &authContext{ctx, creds} -} - -type authContext struct { - context.Context - creds *auth.Credentials -} - -// Value implements context.Context. -func (ac *authContext) Value(key interface{}) interface{} { - switch key { - case auth.CtxCredentials: - return ac.creds - default: - return ac.Context.Value(key) - } -} - -// WithLimitSet returns a copy of ctx carrying l. -func WithLimitSet(ctx context.Context, l *limits.LimitSet) context.Context { - return limitContext{ctx, l} -} - -type limitContext struct { - context.Context - l *limits.LimitSet -} - -// Value implements context.Context. -func (lc limitContext) Value(key interface{}) interface{} { - switch key { - case limits.CtxLimits: - return lc.l - default: - return lc.Context.Value(key) - } -} diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD deleted file mode 100644 index bf802d1b6..000000000 --- a/pkg/sentry/control/BUILD +++ /dev/null @@ -1,47 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "control", - srcs = [ - "control.go", - "logging.go", - "pprof.go", - "proc.go", - "state.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/control", - visibility = [ - "//pkg/sentry:internal", - ], - deps = [ - "//pkg/abi/linux", - "//pkg/fd", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/fs/host", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/state", - "//pkg/sentry/strace", - "//pkg/sentry/usage", - "//pkg/sentry/watchdog", - "//pkg/tcpip/link/sniffer", - "//pkg/urpc", - ], -) - -go_test( - name = "control_test", - size = "small", - srcs = ["proc_test.go"], - embed = [":control"], - deps = [ - "//pkg/log", - "//pkg/sentry/kernel/time", - "//pkg/sentry/usage", - ], -) diff --git a/pkg/sentry/control/control_state_autogen.go b/pkg/sentry/control/control_state_autogen.go new file mode 100755 index 000000000..a1de4bc6d --- /dev/null +++ b/pkg/sentry/control/control_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package control + diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go deleted file mode 100644 index d8ada2694..000000000 --- a/pkg/sentry/control/proc_test.go +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package control - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/log" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usage" -) - -func init() { - log.SetLevel(log.Debug) -} - -// Tests that ProcessData.Table() prints with the correct format. -func TestProcessListTable(t *testing.T) { - testCases := []struct { - pl []*Process - expected string - }{ - { - pl: []*Process{}, - expected: "UID PID PPID C STIME TIME CMD", - }, - { - pl: []*Process{ - { - UID: 0, - PID: 0, - PPID: 0, - C: 0, - STime: "0", - Time: "0", - Cmd: "zero", - }, - { - UID: 1, - PID: 1, - PPID: 1, - C: 1, - STime: "1", - Time: "1", - Cmd: "one", - }, - }, - expected: `UID PID PPID C STIME TIME CMD -0 0 0 0 0 0 zero -1 1 1 1 1 1 one`, - }, - } - - for _, tc := range testCases { - output := ProcessListToTable(tc.pl) - - if tc.expected != output { - t.Errorf("PrintTable(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected) - } - } -} - -func TestProcessListJSON(t *testing.T) { - testCases := []struct { - pl []*Process - expected string - }{ - { - pl: []*Process{}, - expected: "[]", - }, - { - pl: []*Process{ - { - UID: 0, - PID: 0, - PPID: 0, - C: 0, - STime: "0", - Time: "0", - Cmd: "zero", - }, - { - UID: 1, - PID: 1, - PPID: 1, - C: 1, - STime: "1", - Time: "1", - Cmd: "one", - }, - }, - expected: "[0,1]", - }, - } - - for _, tc := range testCases { - output, err := PrintPIDsJSON(tc.pl) - if err != nil { - t.Errorf("failed to generate JSON: %v", err) - } - - if tc.expected != output { - t.Errorf("PrintJSON(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected) - } - } -} - -func TestPercentCPU(t *testing.T) { - testCases := []struct { - stats usage.CPUStats - startTime ktime.Time - now ktime.Time - expected int32 - }{ - { - // Verify that 100% use is capped at 99. - stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9}, - startTime: ktime.FromNanoseconds(7e9), - now: ktime.FromNanoseconds(9e9), - expected: 99, - }, - { - // Verify that if usage > lifetime, we get at most 99% - // usage. - stats: usage.CPUStats{UserTime: 2e9, SysTime: 2e9}, - startTime: ktime.FromNanoseconds(7e9), - now: ktime.FromNanoseconds(9e9), - expected: 99, - }, - { - // Verify that 50% usage is reported correctly. - stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9}, - startTime: ktime.FromNanoseconds(12e9), - now: ktime.FromNanoseconds(16e9), - expected: 50, - }, - { - // Verify that 0% usage is reported correctly. - stats: usage.CPUStats{UserTime: 0, SysTime: 0}, - startTime: ktime.FromNanoseconds(12e9), - now: ktime.FromNanoseconds(14e9), - expected: 0, - }, - } - - for _, tc := range testCases { - if pcpu := percentCPU(tc.stats, tc.startTime, tc.now); pcpu != tc.expected { - t.Errorf("percentCPU(%v, %v, %v): got %d, want %d", tc.stats, tc.startTime, tc.now, pcpu, tc.expected) - } - } -} diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD deleted file mode 100644 index 7e8918722..000000000 --- a/pkg/sentry/device/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "device", - srcs = ["device.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/device", - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/abi/linux"], -) - -go_test( - name = "device_test", - size = "small", - srcs = ["device_test.go"], - embed = [":device"], -) diff --git a/pkg/sentry/device/device_state_autogen.go b/pkg/sentry/device/device_state_autogen.go new file mode 100755 index 000000000..be40f6c77 --- /dev/null +++ b/pkg/sentry/device/device_state_autogen.go @@ -0,0 +1,52 @@ +// automatically generated by stateify. + +package device + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Registry) beforeSave() {} +func (x *Registry) save(m state.Map) { + x.beforeSave() + m.Save("lastAnonDeviceMinor", &x.lastAnonDeviceMinor) + m.Save("devices", &x.devices) +} + +func (x *Registry) afterLoad() {} +func (x *Registry) load(m state.Map) { + m.Load("lastAnonDeviceMinor", &x.lastAnonDeviceMinor) + m.Load("devices", &x.devices) +} + +func (x *ID) beforeSave() {} +func (x *ID) save(m state.Map) { + x.beforeSave() + m.Save("Major", &x.Major) + m.Save("Minor", &x.Minor) +} + +func (x *ID) afterLoad() {} +func (x *ID) load(m state.Map) { + m.Load("Major", &x.Major) + m.Load("Minor", &x.Minor) +} + +func (x *Device) beforeSave() {} +func (x *Device) save(m state.Map) { + x.beforeSave() + m.Save("ID", &x.ID) + m.Save("last", &x.last) +} + +func (x *Device) afterLoad() {} +func (x *Device) load(m state.Map) { + m.Load("ID", &x.ID) + m.Load("last", &x.last) +} + +func init() { + state.Register("device.Registry", (*Registry)(nil), state.Fns{Save: (*Registry).save, Load: (*Registry).load}) + state.Register("device.ID", (*ID)(nil), state.Fns{Save: (*ID).save, Load: (*ID).load}) + state.Register("device.Device", (*Device)(nil), state.Fns{Save: (*Device).save, Load: (*Device).load}) +} diff --git a/pkg/sentry/device/device_test.go b/pkg/sentry/device/device_test.go deleted file mode 100644 index e3f51ce4f..000000000 --- a/pkg/sentry/device/device_test.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package device - -import ( - "testing" -) - -func TestMultiDevice(t *testing.T) { - device := &MultiDevice{} - - // Check that Load fails to install virtual inodes that are - // uninitialized. - if device.Load(MultiDeviceKey{}, 0) { - t.Fatalf("got load of invalid virtual inode 0, want unsuccessful") - } - - inode := device.Map(MultiDeviceKey{}) - - // Assert that the same raw device and inode map to - // a consistent virtual inode. - if i := device.Map(MultiDeviceKey{}); i != inode { - t.Fatalf("got inode %d, want %d in %s", i, inode, device) - } - - // Assert that a new inode or new device does not conflict. - if i := device.Map(MultiDeviceKey{Device: 0, Inode: 1}); i == inode { - t.Fatalf("got reused inode %d, want new distinct inode in %s", i, device) - } - last := device.Map(MultiDeviceKey{Device: 1, Inode: 0}) - if last == inode { - t.Fatalf("got reused inode %d, want new distinct inode in %s", last, device) - } - - // Virtual is the virtual inode we want to load. - virtual := last + 1 - - // Assert that we can load a virtual inode at a new place. - if !device.Load(MultiDeviceKey{Device: 0, Inode: 2}, virtual) { - t.Fatalf("got load of virtual inode %d failed, want success in %s", virtual, device) - } - - // Assert that the next inode skips over the loaded one. - if i := device.Map(MultiDeviceKey{Device: 0, Inode: 3}); i != virtual+1 { - t.Fatalf("got inode %d, want %d in %s", i, virtual+1, device) - } -} diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD deleted file mode 100644 index d7259b47b..000000000 --- a/pkg/sentry/fs/BUILD +++ /dev/null @@ -1,135 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "fs", - srcs = [ - "attr.go", - "context.go", - "copy_up.go", - "dentry.go", - "dirent.go", - "dirent_cache.go", - "dirent_cache_limiter.go", - "dirent_list.go", - "dirent_state.go", - "event_list.go", - "file.go", - "file_operations.go", - "file_overlay.go", - "file_state.go", - "filesystems.go", - "flags.go", - "fs.go", - "inode.go", - "inode_inotify.go", - "inode_operations.go", - "inode_overlay.go", - "inotify.go", - "inotify_event.go", - "inotify_watch.go", - "mock.go", - "mount.go", - "mount_overlay.go", - "mounts.go", - "offset.go", - "overlay.go", - "path.go", - "restore.go", - "save.go", - "seek.go", - "splice.go", - "sync.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/log", - "//pkg/metric", - "//pkg/p9", - "//pkg/refs", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/platform", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/state", - "//pkg/syserror", - "//pkg/waiter", - "//third_party/gvsync", - ], -) - -go_template_instance( - name = "dirent_list", - out = "dirent_list.go", - package = "fs", - prefix = "dirent", - template = "//pkg/ilist:generic_list", - types = { - "Linker": "*Dirent", - "Element": "*Dirent", - }, -) - -go_template_instance( - name = "event_list", - out = "event_list.go", - package = "fs", - prefix = "event", - template = "//pkg/ilist:generic_list", - types = { - "Linker": "*Event", - "Element": "*Event", - }, -) - -go_test( - name = "fs_x_test", - size = "small", - srcs = [ - "copy_up_test.go", - "file_overlay_test.go", - "inode_overlay_test.go", - "mounts_test.go", - ], - deps = [ - ":fs", - "//pkg/sentry/context", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) - -go_test( - name = "fs_test", - size = "small", - srcs = [ - "dirent_cache_test.go", - "dirent_refs_test.go", - "mount_test.go", - "path_test.go", - ], - embed = [":fs"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - ], -) diff --git a/pkg/sentry/fs/README.md b/pkg/sentry/fs/README.md deleted file mode 100644 index db4a1b730..000000000 --- a/pkg/sentry/fs/README.md +++ /dev/null @@ -1,229 +0,0 @@ -This package provides an implementation of the Linux virtual filesystem. - -[TOC] - -## Overview - -- An `fs.Dirent` caches an `fs.Inode` in memory at a path in the VFS, giving - the `fs.Inode` a relative position with respect to other `fs.Inode`s. - -- If an `fs.Dirent` is referenced by two file descriptors, then those file - descriptors are coherent with each other: they depend on the same - `fs.Inode`. - -- A mount point is an `fs.Dirent` for which `fs.Dirent.mounted` is true. It - exposes the root of a mounted filesystem. - -- The `fs.Inode` produced by a registered filesystem on mount(2) owns an - `fs.MountedFilesystem` from which other `fs.Inode`s will be looked up. For a - remote filesystem, the `fs.MountedFilesystem` owns the connection to that - remote filesystem. - -- In general: - -``` -fs.Inode <------------------------------ -| | -| | -produced by | -exactly one | -| responsible for the -| virtual identity of -v | -fs.MountedFilesystem ------------------- -``` - -Glossary: - -- VFS: virtual filesystem. - -- inode: a virtual file object holding a cached view of a file on a backing - filesystem (includes metadata and page caches). - -- superblock: the virtual state of a mounted filesystem (e.g. the virtual - inode number set). - -- mount namespace: a view of the mounts under a root (during path traversal, - the VFS makes visible/follows the mount point that is in the current task's - mount namespace). - -## Save and restore - -An application's hard dependencies on filesystem state can be broken down into -two categories: - -- The state necessary to execute a traversal on or view the *virtual* - filesystem hierarchy, regardless of what files an application has open. - -- The state necessary to represent open files. - -The first is always necessary to save and restore. An application may never have -any open file descriptors, but across save and restore it should see a coherent -view of any mount namespace. NOTE(b/63601033): Currently only one "initial" -mount namespace is supported. - -The second is so that system calls across save and restore are coherent with -each other (e.g. so that unintended re-reads or overwrites do not occur). - -Specifically this state is: - -- An `fs.MountManager` containing mount points. - -- A `kernel.FDTable` containing pointers to open files. - -Anything else managed by the VFS that can be easily loaded into memory from a -filesystem is synced back to those filesystems and is not saved. Examples are -pages in page caches used for optimizations (i.e. readahead and writeback), and -directory entries used to accelerate path lookups. - -### Mount points - -Saving and restoring a mount point means saving and restoring: - -- The root of the mounted filesystem. - -- Mount flags, which control how the VFS interacts with the mounted - filesystem. - -- Any relevant metadata about the mounted filesystem. - -- All `fs.Inode`s referenced by the application that reside under the mount - point. - -`fs.MountedFilesystem` is metadata about a filesystem that is mounted. It is -referenced by every `fs.Inode` loaded into memory under the mount point -including the `fs.Inode` of the mount point itself. The `fs.MountedFilesystem` -maps file objects on the filesystem to a virtualized `fs.Inode` number and vice -versa. - -To restore all `fs.Inode`s under a given mount point, each `fs.Inode` leverages -its dependency on an `fs.MountedFilesystem`. Since the `fs.MountedFilesystem` -knows how an `fs.Inode` maps to a file object on a backing filesystem, this -mapping can be trivially consulted by each `fs.Inode` when the `fs.Inode` is -restored. - -In detail, a mount point is saved in two steps: - -- First, after the kernel is paused but before state.Save, we walk all mount - namespaces and install a mapping from `fs.Inode` numbers to file paths - relative to the root of the mounted filesystem in each - `fs.MountedFilesystem`. This is subsequently called the set of `fs.Inode` - mappings. - -- Second, during state.Save, each `fs.MountedFilesystem` decides whether to - save the set of `fs.Inode` mappings. In-memory filesystems, like tmpfs, have - no need to save a set of `fs.Inode` mappings, since the `fs.Inode`s can be - entirely encoded in state file. Each `fs.MountedFilesystem` also optionally - saves the device name from when the filesystem was originally mounted. Each - `fs.Inode` saves its virtual identifier and a reference to a - `fs.MountedFilesystem`. - -A mount point is restored in two steps: - -- First, before state.Load, all mount configurations are stored in a global - `fs.RestoreEnvironment`. This tells us what mount points the user wants to - restore and how to re-establish pointers to backing filesystems. - -- Second, during state.Load, each `fs.MountedFilesystem` optionally searches - for a mount in the `fs.RestoreEnvironment` that matches its saved device - name. The `fs.MountedFilesystem` then reestablishes a pointer to the root of - the mounted filesystem. For example, the mount specification provides the - network connection for a mounted remote filesystem client to communicate - with its remote file server. The `fs.MountedFilesystem` also trivially loads - its set of `fs.Inode` mappings. When an `fs.Inode` is encountered, the - `fs.Inode` loads its virtual identifier and its reference a - `fs.MountedFilesystem`. It uses the `fs.MountedFilesystem` to obtain the - root of the mounted filesystem and the `fs.Inode` mappings to obtain the - relative file path to its data. With these, the `fs.Inode` re-establishes a - pointer to its file object. - -A mount point can trivially restore its `fs.Inode`s in parallel since -`fs.Inode`s have a restore dependency on their `fs.MountedFilesystem` and not on -each other. - -### Open files - -An `fs.File` references the following filesystem objects: - -```go -fs.File -> fs.Dirent -> fs.Inode -> fs.MountedFilesystem -``` - -The `fs.Inode` is restored using its `fs.MountedFilesystem`. The -[Mount points](#mount-points) section above describes how this happens in -detail. The `fs.Dirent` restores its pointer to an `fs.Inode`, pointers to -parent and children `fs.Dirents`, and the basename of the file. - -Otherwise an `fs.File` restores flags, an offset, and a unique identifier (only -used internally). - -It may use the `fs.Inode`, which it indirectly holds a reference on through the -`fs.Dirent`, to reestablish an open file handle on the backing filesystem (e.g. -to continue reading and writing). - -## Overlay - -The overlay implementation in the fs package takes Linux overlayfs as a frame of -reference but corrects for several POSIX consistency errors. - -In Linux overlayfs, the `struct inode` used for reading and writing to the same -file may be different. This is because the `struct inode` is dissociated with -the process of copying up the file from the upper to the lower directory. Since -flock(2) and fcntl(2) locks, inotify(7) watches, page caches, and a file's -identity are all stored directly or indirectly off the `struct inode`, these -properties of the `struct inode` may be stale after the first modification. This -can lead to file locking bugs, missed inotify events, and inconsistent data in -shared memory mappings of files, to name a few problems. - -The fs package maintains a single `fs.Inode` to represent a directory entry in -an overlay and defines operations on this `fs.Inode` which synchronize with the -copy up process. This achieves several things: - -+ File locks, inotify watches, and the identity of the file need not be copied - at all. - -+ Memory mappings of files coordinate with the copy up process so that if a - file in the lower directory is memory mapped, all references to it are - invalidated, forcing the application to re-fault on memory mappings of the - file under the upper directory. - -The `fs.Inode` holds metadata about files in the upper and/or lower directories -via an `fs.overlayEntry`. The `fs.overlayEntry` implements the `fs.Mappable` -interface. It multiplexes between upper and lower directory memory mappings and -stores a copy of memory references so they can be transferred to the upper -directory `fs.Mappable` when the file is copied up. - -The lower filesystem in an overlay may contain another (nested) overlay, but the -upper filesystem may not contain another overlay. In other words, nested -overlays form a tree structure that only allows branching in the lower -filesystem. - -Caching decisions in the overlay are delegated to the upper filesystem, meaning -that the Keep and Revalidate methods on the overlay return the same values as -the upper filesystem. A small wrinkle is that the lower filesystem is not -allowed to return `true` from Revalidate, as the overlay can not reload inodes -from the lower filesystem. A lower filesystem that does return `true` from -Revalidate will trigger a panic. - -The `fs.Inode` also holds a reference to a `fs.MountedFilesystem` that -normalizes across the mounted filesystem state of the upper and lower -directories. - -When a file is copied from the lower to the upper directory, attempts to -interact with the file block until the copy completes. All copying synchronizes -with rename(2). - -## Future Work - -### Overlay - -When a file is copied from a lower directory to an upper directory, several -locks are taken: the global renamuMu and the copyMu of the `fs.Inode` being -copied. This blocks operations on the file, including fault handling of memory -mappings. Performance could be improved by copying files into a temporary -directory that resides on the same filesystem as the upper directory and doing -an atomic rename, holding locks only during the rename operation. - -Additionally files are copied up synchronously. For large files, this causes a -noticeable latency. Performance could be improved by pipelining copies at -non-overlapping file offsets. diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD deleted file mode 100644 index ae1c9cf76..000000000 --- a/pkg/sentry/fs/anon/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "anon", - srcs = [ - "anon.go", - "device.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/anon", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/fs/anon/anon_state_autogen.go b/pkg/sentry/fs/anon/anon_state_autogen.go new file mode 100755 index 000000000..fcb914212 --- /dev/null +++ b/pkg/sentry/fs/anon/anon_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package anon + diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go deleted file mode 100644 index 1d80bf15a..000000000 --- a/pkg/sentry/fs/copy_up_test.go +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "bytes" - "crypto/rand" - "fmt" - "io" - "sync" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -const ( - // origFileSize is the original file size. This many bytes should be - // copied up before the test file is modified. - origFileSize = 4096 - - // truncatedFileSize is the size to truncate all test files. - truncateFileSize = 10 -) - -// TestConcurrentCopyUp is a copy up stress test for an overlay. -// -// It creates a 64-level deep directory tree in the lower filesystem and -// populates the last subdirectory with 64 files containing random content: -// -// /lower -// /sudir0/.../subdir63/ -// /file0 -// ... -// /file63 -// -// The files are truncated concurrently by 4 goroutines per file. -// These goroutines contend with copying up all parent 64 subdirectories -// as well as the final file content. -// -// At the end of the test, we assert that the files respect the new truncated -// size and contain the content we expect. -func TestConcurrentCopyUp(t *testing.T) { - ctx := contexttest.Context(t) - files := makeOverlayTestFiles(t) - - var wg sync.WaitGroup - for _, file := range files { - for i := 0; i < 4; i++ { - wg.Add(1) - go func(o *overlayTestFile) { - if err := o.File.Dirent.Inode.Truncate(ctx, o.File.Dirent, truncateFileSize); err != nil { - t.Fatalf("failed to copy up: %v", err) - } - wg.Done() - }(file) - } - } - wg.Wait() - - for _, file := range files { - got := make([]byte, origFileSize) - n, err := file.File.Readv(ctx, usermem.BytesIOSequence(got)) - if int(n) != truncateFileSize { - t.Fatalf("read %d bytes from file, want %d", n, truncateFileSize) - } - if err != nil && err != io.EOF { - t.Fatalf("read got error %v, want nil", err) - } - if !bytes.Equal(got[:n], file.content[:truncateFileSize]) { - t.Fatalf("file content is %v, want %v", got[:n], file.content[:truncateFileSize]) - } - } -} - -type overlayTestFile struct { - File *fs.File - name string - content []byte -} - -func makeOverlayTestFiles(t *testing.T) []*overlayTestFile { - ctx := contexttest.Context(t) - - // Create a lower tmpfs mount. - fsys, _ := fs.FindFilesystem("tmpfs") - lower, err := fsys.Mount(contexttest.Context(t), "", fs.MountSourceFlags{}, "", nil) - if err != nil { - t.Fatalf("failed to mount tmpfs: %v", err) - } - lowerRoot := fs.NewDirent(ctx, lower, "") - - // Make a deep set of subdirectories that everyone shares. - next := lowerRoot - for i := 0; i < 64; i++ { - name := fmt.Sprintf("subdir%d", i) - err := next.CreateDirectory(ctx, lowerRoot, name, fs.FilePermsFromMode(0777)) - if err != nil { - t.Fatalf("failed to create dir %q: %v", name, err) - } - next, err = next.Walk(ctx, lowerRoot, name) - if err != nil { - t.Fatalf("failed to walk to %q: %v", name, err) - } - } - - // Make a bunch of files in the last directory. - var files []*overlayTestFile - for i := 0; i < 64; i++ { - name := fmt.Sprintf("file%d", i) - f, err := next.Create(ctx, next, name, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - t.Fatalf("failed to create file %q: %v", name, err) - } - defer f.DecRef() - - relname, _ := f.Dirent.FullName(lowerRoot) - - o := &overlayTestFile{ - name: relname, - content: make([]byte, origFileSize), - } - - if _, err := rand.Read(o.content); err != nil { - t.Fatalf("failed to read from /dev/urandom: %v", err) - } - - if _, err := f.Writev(ctx, usermem.BytesIOSequence(o.content)); err != nil { - t.Fatalf("failed to write content to file %q: %v", name, err) - } - - files = append(files, o) - } - - // Create an empty upper tmpfs mount which we will copy up into. - upper, err := fsys.Mount(ctx, "", fs.MountSourceFlags{}, "", nil) - if err != nil { - t.Fatalf("failed to mount tmpfs: %v", err) - } - - // Construct an overlay root. - overlay, err := fs.NewOverlayRoot(ctx, upper, lower, fs.MountSourceFlags{}) - if err != nil { - t.Fatalf("failed to construct overlay root: %v", err) - } - - // Create a MountNamespace to traverse the file system. - mns, err := fs.NewMountNamespace(ctx, overlay) - if err != nil { - t.Fatalf("failed to construct mount manager: %v", err) - } - - // Walk to all of the files in the overlay, open them readable. - for _, f := range files { - maxTraversals := uint(0) - d, err := mns.FindInode(ctx, mns.Root(), mns.Root(), f.name, &maxTraversals) - if err != nil { - t.Fatalf("failed to find %q: %v", f.name, err) - } - defer d.DecRef() - - f.File, err = d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("failed to open file %q readable: %v", f.name, err) - } - } - - return files -} diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD deleted file mode 100644 index 80e106e6f..000000000 --- a/pkg/sentry/fs/dev/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "dev", - srcs = [ - "dev.go", - "device.go", - "fs.go", - "full.go", - "null.go", - "random.go", - "tty.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/dev", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/rand", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/dev/dev_state_autogen.go b/pkg/sentry/fs/dev/dev_state_autogen.go new file mode 100755 index 000000000..a997f3ecf --- /dev/null +++ b/pkg/sentry/fs/dev/dev_state_autogen.go @@ -0,0 +1,130 @@ +// automatically generated by stateify. + +package dev + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *filesystem) beforeSave() {} +func (x *filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystem) afterLoad() {} +func (x *filesystem) load(m state.Map) { +} + +func (x *fullDevice) beforeSave() {} +func (x *fullDevice) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *fullDevice) afterLoad() {} +func (x *fullDevice) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *fullFileOperations) beforeSave() {} +func (x *fullFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *fullFileOperations) afterLoad() {} +func (x *fullFileOperations) load(m state.Map) { +} + +func (x *nullDevice) beforeSave() {} +func (x *nullDevice) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *nullDevice) afterLoad() {} +func (x *nullDevice) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *nullFileOperations) beforeSave() {} +func (x *nullFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *nullFileOperations) afterLoad() {} +func (x *nullFileOperations) load(m state.Map) { +} + +func (x *zeroDevice) beforeSave() {} +func (x *zeroDevice) save(m state.Map) { + x.beforeSave() + m.Save("nullDevice", &x.nullDevice) +} + +func (x *zeroDevice) afterLoad() {} +func (x *zeroDevice) load(m state.Map) { + m.Load("nullDevice", &x.nullDevice) +} + +func (x *zeroFileOperations) beforeSave() {} +func (x *zeroFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *zeroFileOperations) afterLoad() {} +func (x *zeroFileOperations) load(m state.Map) { +} + +func (x *randomDevice) beforeSave() {} +func (x *randomDevice) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *randomDevice) afterLoad() {} +func (x *randomDevice) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *randomFileOperations) beforeSave() {} +func (x *randomFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *randomFileOperations) afterLoad() {} +func (x *randomFileOperations) load(m state.Map) { +} + +func (x *ttyInodeOperations) beforeSave() {} +func (x *ttyInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *ttyInodeOperations) afterLoad() {} +func (x *ttyInodeOperations) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *ttyFileOperations) beforeSave() {} +func (x *ttyFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *ttyFileOperations) afterLoad() {} +func (x *ttyFileOperations) load(m state.Map) { +} + +func init() { + state.Register("dev.filesystem", (*filesystem)(nil), state.Fns{Save: (*filesystem).save, Load: (*filesystem).load}) + state.Register("dev.fullDevice", (*fullDevice)(nil), state.Fns{Save: (*fullDevice).save, Load: (*fullDevice).load}) + state.Register("dev.fullFileOperations", (*fullFileOperations)(nil), state.Fns{Save: (*fullFileOperations).save, Load: (*fullFileOperations).load}) + state.Register("dev.nullDevice", (*nullDevice)(nil), state.Fns{Save: (*nullDevice).save, Load: (*nullDevice).load}) + state.Register("dev.nullFileOperations", (*nullFileOperations)(nil), state.Fns{Save: (*nullFileOperations).save, Load: (*nullFileOperations).load}) + state.Register("dev.zeroDevice", (*zeroDevice)(nil), state.Fns{Save: (*zeroDevice).save, Load: (*zeroDevice).load}) + state.Register("dev.zeroFileOperations", (*zeroFileOperations)(nil), state.Fns{Save: (*zeroFileOperations).save, Load: (*zeroFileOperations).load}) + state.Register("dev.randomDevice", (*randomDevice)(nil), state.Fns{Save: (*randomDevice).save, Load: (*randomDevice).load}) + state.Register("dev.randomFileOperations", (*randomFileOperations)(nil), state.Fns{Save: (*randomFileOperations).save, Load: (*randomFileOperations).load}) + state.Register("dev.ttyInodeOperations", (*ttyInodeOperations)(nil), state.Fns{Save: (*ttyInodeOperations).save, Load: (*ttyInodeOperations).load}) + state.Register("dev.ttyFileOperations", (*ttyFileOperations)(nil), state.Fns{Save: (*ttyFileOperations).save, Load: (*ttyFileOperations).load}) +} diff --git a/pkg/sentry/fs/dirent_cache_test.go b/pkg/sentry/fs/dirent_cache_test.go deleted file mode 100644 index 395c879f5..000000000 --- a/pkg/sentry/fs/dirent_cache_test.go +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" -) - -func TestDirentCache(t *testing.T) { - const maxSize = 5 - - c := NewDirentCache(maxSize) - - // Size starts at 0. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Create a Dirent d. - d := NewNegativeDirent("") - - // c does not contain d. - if got, want := c.contains(d), false; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add d to the cache. - c.Add(d) - - // Size is now 1. - if got, want := c.Size(), uint64(1); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add maxSize-1 more elements. d should be oldest element. - for i := 0; i < maxSize-1; i++ { - c.Add(NewNegativeDirent("")) - } - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // "Bump" d to the front by re-adding it. - c.Add(d) - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add maxSize-1 more elements. d should again be oldest element. - for i := 0; i < maxSize-1; i++ { - c.Add(NewNegativeDirent("")) - } - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add one more element, which will bump d from the cache. - c.Add(NewNegativeDirent("")) - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c does not contain d. - if got, want := c.contains(d), false; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Invalidating causes size to be 0 and list to be empty. - c.Invalidate() - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - if got, want := c.list.Empty(), true; got != want { - t.Errorf("c.list.Empty() got %v, want %v", got, want) - } - - // Fill cache with maxSize dirents. - for i := 0; i < maxSize; i++ { - c.Add(NewNegativeDirent("")) - } -} - -func TestDirentCacheLimiter(t *testing.T) { - const ( - globalMaxSize = 5 - maxSize = 3 - ) - - limit := NewDirentCacheLimiter(globalMaxSize) - c1 := NewDirentCache(maxSize) - c1.limit = limit - c2 := NewDirentCache(maxSize) - c2.limit = limit - - // Create a Dirent d. - d := NewNegativeDirent("") - - // Add d to the cache. - c1.Add(d) - if got, want := c1.Size(), uint64(1); got != want { - t.Errorf("c1.Size() got %v, want %v", got, want) - } - - // Add maxSize-1 more elements. d should be oldest element. - for i := 0; i < maxSize-1; i++ { - c1.Add(NewNegativeDirent("")) - } - if got, want := c1.Size(), uint64(maxSize); got != want { - t.Errorf("c1.Size() got %v, want %v", got, want) - } - - // Check that d is still there. - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Fill up the other cache, it will start dropping old entries from the cache - // when the global limit is reached. - for i := 0; i < maxSize; i++ { - c2.Add(NewNegativeDirent("")) - } - - // Check is what's remaining from global max. - if got, want := c2.Size(), globalMaxSize-maxSize; int(got) != want { - t.Errorf("c2.Size() got %v, want %v", got, want) - } - - // Check that d was not dropped. - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Add an entry that will eventually be dropped. Check is done later... - drop := NewNegativeDirent("") - c1.Add(drop) - - // Check that d is bumped to front even when global limit is reached. - c1.Add(d) - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Add 2 more element and check that: - // - d is still in the list: to verify that d was bumped - // - d2/d3 are in the list: older entries are dropped when global limit is - // reached. - // - drop is not in the list: indeed older elements are dropped. - d2 := NewNegativeDirent("") - c1.Add(d2) - d3 := NewNegativeDirent("") - c1.Add(d3) - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - if got, want := c1.contains(d2), true; got != want { - t.Errorf("c1.contains(d2) got %v want %v", got, want) - } - if got, want := c1.contains(d3), true; got != want { - t.Errorf("c1.contains(d3) got %v want %v", got, want) - } - if got, want := c1.contains(drop), false; got != want { - t.Errorf("c1.contains(drop) got %v want %v", got, want) - } - - // Drop all entries from one cache. The other will be allowed to grow. - c1.Invalidate() - c2.Add(NewNegativeDirent("")) - if got, want := c2.Size(), uint64(maxSize); got != want { - t.Errorf("c2.Size() got %v, want %v", got, want) - } -} - -// TestNilDirentCache tests that a nil cache supports all cache operations, but -// treats them as noop. -func TestNilDirentCache(t *testing.T) { - // Create a nil cache. - var c *DirentCache - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Add. - c.Add(NewNegativeDirent("")) - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Remove. - c.Remove(NewNegativeDirent("")) - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Invalidate. - c.Invalidate() - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } -} diff --git a/pkg/sentry/fs/dirent_list.go b/pkg/sentry/fs/dirent_list.go new file mode 100755 index 000000000..750961a48 --- /dev/null +++ b/pkg/sentry/fs/dirent_list.go @@ -0,0 +1,173 @@ +package fs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type direntElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (direntElementMapper) linkerFor(elem *Dirent) *Dirent { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type direntList struct { + head *Dirent + tail *Dirent +} + +// Reset resets list l to the empty state. +func (l *direntList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *direntList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *direntList) Front() *Dirent { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *direntList) Back() *Dirent { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *direntList) PushFront(e *Dirent) { + direntElementMapper{}.linkerFor(e).SetNext(l.head) + direntElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + direntElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *direntList) PushBack(e *Dirent) { + direntElementMapper{}.linkerFor(e).SetNext(nil) + direntElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + direntElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *direntList) PushBackList(m *direntList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + direntElementMapper{}.linkerFor(l.tail).SetNext(m.head) + direntElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *direntList) InsertAfter(b, e *Dirent) { + a := direntElementMapper{}.linkerFor(b).Next() + direntElementMapper{}.linkerFor(e).SetNext(a) + direntElementMapper{}.linkerFor(e).SetPrev(b) + direntElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + direntElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *direntList) InsertBefore(a, e *Dirent) { + b := direntElementMapper{}.linkerFor(a).Prev() + direntElementMapper{}.linkerFor(e).SetNext(a) + direntElementMapper{}.linkerFor(e).SetPrev(b) + direntElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + direntElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *direntList) Remove(e *Dirent) { + prev := direntElementMapper{}.linkerFor(e).Prev() + next := direntElementMapper{}.linkerFor(e).Next() + + if prev != nil { + direntElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + direntElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type direntEntry struct { + next *Dirent + prev *Dirent +} + +// Next returns the entry that follows e in the list. +func (e *direntEntry) Next() *Dirent { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *direntEntry) Prev() *Dirent { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *direntEntry) SetNext(elem *Dirent) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *direntEntry) SetPrev(elem *Dirent) { + e.prev = elem +} diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go deleted file mode 100644 index 884e3ff06..000000000 --- a/pkg/sentry/fs/dirent_refs_test.go +++ /dev/null @@ -1,418 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" -) - -func newMockDirInode(ctx context.Context, cache *DirentCache) *Inode { - return NewMockInode(ctx, NewMockMountSource(cache), StableAttr{Type: Directory}) -} - -func TestWalkPositive(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, newMockDirInode(ctx, nil), "root") - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - name := "d" - d, err := root.walk(ctx, root, name, false) - if err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - - if got := root.ReadRefs(); got != 2 { - t.Fatalf("root has a ref count of %d, want %d", got, 2) - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 1) - } - - d.DecRef() - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - if got := d.ReadRefs(); got != 0 { - t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 0) - } - - root.flush() - - if got := len(root.children); got != 0 { - t.Fatalf("root has %d children, want %d", got, 0) - } -} - -func TestWalkNegative(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") - mn := root.Inode.InodeOperations.(*mockInodeOperationsLookupNegative) - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - name := "d" - for i := 0; i < 100; i++ { - _, err := root.walk(ctx, root, name, false) - if err != syscall.ENOENT { - t.Fatalf("root.walk(root, %q) got %v, want %v", name, err, syscall.ENOENT) - } - } - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - if got := len(root.children); got != 1 { - t.Fatalf("root has %d children, want %d", got, 1) - } - - w, ok := root.children[name] - if !ok { - t.Fatalf("root wants child at %q", name) - } - - child := w.Get() - if child == nil { - t.Fatalf("root wants to resolve weak reference") - } - - if !child.(*Dirent).IsNegative() { - t.Fatalf("root found positive child at %q, want negative", name) - } - - if got := child.(*Dirent).ReadRefs(); got != 2 { - t.Fatalf("child has a ref count of %d, want %d", got, 2) - } - - child.DecRef() - - if got := child.(*Dirent).ReadRefs(); got != 1 { - t.Fatalf("child has a ref count of %d, want %d", got, 1) - } - - if got := len(root.children); got != 1 { - t.Fatalf("root has %d children, want %d", got, 1) - } - - root.DecRef() - - if got := root.ReadRefs(); got != 0 { - t.Fatalf("root has a ref count of %d, want %d", got, 0) - } - - AsyncBarrier() - - if got := mn.releaseCalled; got != true { - t.Fatalf("root.Close was called %v, want true", got) - } -} - -type mockInodeOperationsLookupNegative struct { - *MockInodeOperations - releaseCalled bool -} - -func NewEmptyDir(ctx context.Context, cache *DirentCache) *Inode { - m := NewMockMountSource(cache) - return NewInode(ctx, &mockInodeOperationsLookupNegative{ - MockInodeOperations: NewMockInodeOperations(ctx), - }, m, StableAttr{Type: Directory}) -} - -func (m *mockInodeOperationsLookupNegative) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { - return NewNegativeDirent(p), nil -} - -func (m *mockInodeOperationsLookupNegative) Release(context.Context) { - m.releaseCalled = true -} - -func TestHashNegativeToPositive(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") - - name := "d" - _, err := root.walk(ctx, root, name, false) - if err != syscall.ENOENT { - t.Fatalf("root.walk(root, %q) got %v, want %v", name, err, syscall.ENOENT) - } - - if got := root.exists(ctx, root, name); got != false { - t.Fatalf("got %q exists, want does not exist", name) - } - - f, err := root.Create(ctx, root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(%q, _), got error %v, want nil", name, err) - } - d := f.Dirent - - if d.IsNegative() { - t.Fatalf("got negative Dirent, want positive") - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("child %q has a ref count of %d, want %d", name, got, 1) - } - - if got := root.ReadRefs(); got != 2 { - t.Fatalf("root has a ref count of %d, want %d", got, 2) - } - - if got := len(root.children); got != 1 { - t.Fatalf("got %d children, want %d", got, 1) - } - - w, ok := root.children[name] - if !ok { - t.Fatalf("failed to find weak reference to %q", name) - } - - child := w.Get() - if child == nil { - t.Fatalf("want to resolve weak reference") - } - - if child.(*Dirent) != d { - t.Fatalf("got foreign child") - } -} - -func TestRevalidate(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - for _, test := range []struct { - // desc is the test's description. - desc string - - // Whether to make negative Dirents. - makeNegative bool - }{ - { - desc: "Revalidate negative Dirent", - makeNegative: true, - }, - { - desc: "Revalidate positive Dirent", - makeNegative: false, - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewMockInodeRevalidate(ctx, test.makeNegative), "root") - - name := "d" - d1, err := root.walk(ctx, root, name, false) - if !test.makeNegative && err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - d2, err := root.walk(ctx, root, name, false) - if !test.makeNegative && err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - if !test.makeNegative && d1 == d2 { - t.Fatalf("revalidating walk got same *Dirent, want different") - } - if got := len(root.children); got != 1 { - t.Errorf("revalidating walk got %d children, want %d", got, 1) - } - }) - } -} - -type MockInodeOperationsRevalidate struct { - *MockInodeOperations - makeNegative bool -} - -func NewMockInodeRevalidate(ctx context.Context, makeNegative bool) *Inode { - mn := NewMockInodeOperations(ctx) - m := NewMockMountSource(nil) - m.MountSourceOperations.(*MockMountSourceOps).revalidate = true - return NewInode(ctx, &MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory}) -} - -func (m *MockInodeOperationsRevalidate) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { - if !m.makeNegative { - return m.MockInodeOperations.Lookup(ctx, dir, p) - } - return NewNegativeDirent(p), nil -} - -func TestCreateExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - for _, test := range []struct { - // desc is the test's description. - desc string - - // root is the Dirent to create from. - root *Dirent - - // expected references on walked Dirent. - refs int64 - }{ - { - desc: "Create caching", - root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), - refs: 2, - }, - { - desc: "Create not caching", - root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), - refs: 1, - }, - } { - t.Run(test.desc, func(t *testing.T) { - name := "d" - f, err := test.root.Create(ctx, test.root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(root, %q) failed: %v", name, err) - } - d := f.Dirent - - if got := d.ReadRefs(); got != test.refs { - t.Errorf("dirent has a ref count of %d, want %d", got, test.refs) - } - }) - } -} - -func TestRemoveExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - for _, test := range []struct { - // desc is the test's description. - desc string - - // root is the Dirent to make and remove from. - root *Dirent - }{ - { - desc: "Remove caching", - root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), - }, - { - desc: "Remove not caching", - root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), - }, - } { - t.Run(test.desc, func(t *testing.T) { - name := "d" - f, err := test.root.Create(ctx, test.root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(%q, _) failed: %v", name, err) - } - d := f.Dirent - - if err := test.root.Remove(contexttest.Context(t), test.root, name); err != nil { - t.Fatalf("root.Remove(root, %q) failed: %v", name, err) - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("dirent has a ref count of %d, want %d", got, 1) - } - - d.DecRef() - - test.root.flush() - - if got := len(test.root.children); got != 0 { - t.Errorf("root has %d children, want %d", got, 0) - } - }) - } -} - -func TestRenameExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - for _, test := range []struct { - // desc is the test's description. - desc string - - // cache of extra Dirent references, may be nil. - cache *DirentCache - }{ - { - desc: "Rename no caching", - cache: nil, - }, - { - desc: "Rename caching", - cache: NewDirentCache(5), - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - - dirAttr := StableAttr{Type: Directory} - - oldParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent") - newParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent") - - renamed, err := oldParent.Walk(ctx, oldParent, "old_child") - if err != nil { - t.Fatalf("Walk(oldParent, %q) got error %v, want nil", "old_child", err) - } - replaced, err := newParent.Walk(ctx, oldParent, "new_child") - if err != nil { - t.Fatalf("Walk(newParent, %q) got error %v, want nil", "new_child", err) - } - - if err := Rename(contexttest.RootContext(t), oldParent /*root */, oldParent, "old_child", newParent, "new_child"); err != nil { - t.Fatalf("Rename got error %v, want nil", err) - } - - oldParent.flush() - newParent.flush() - - // Expect to have only active references. - if got := renamed.ReadRefs(); got != 1 { - t.Errorf("renamed has ref count %d, want only active references %d", got, 1) - } - if got := replaced.ReadRefs(); got != 1 { - t.Errorf("replaced has ref count %d, want only active references %d", got, 1) - } - }) - } -} diff --git a/pkg/sentry/fs/event_list.go b/pkg/sentry/fs/event_list.go new file mode 100755 index 000000000..c94cb03e1 --- /dev/null +++ b/pkg/sentry/fs/event_list.go @@ -0,0 +1,173 @@ +package fs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type eventElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (eventElementMapper) linkerFor(elem *Event) *Event { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type eventList struct { + head *Event + tail *Event +} + +// Reset resets list l to the empty state. +func (l *eventList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *eventList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *eventList) Front() *Event { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *eventList) Back() *Event { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *eventList) PushFront(e *Event) { + eventElementMapper{}.linkerFor(e).SetNext(l.head) + eventElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + eventElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *eventList) PushBack(e *Event) { + eventElementMapper{}.linkerFor(e).SetNext(nil) + eventElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *eventList) PushBackList(m *eventList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(m.head) + eventElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *eventList) InsertAfter(b, e *Event) { + a := eventElementMapper{}.linkerFor(b).Next() + eventElementMapper{}.linkerFor(e).SetNext(a) + eventElementMapper{}.linkerFor(e).SetPrev(b) + eventElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + eventElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *eventList) InsertBefore(a, e *Event) { + b := eventElementMapper{}.linkerFor(a).Prev() + eventElementMapper{}.linkerFor(e).SetNext(a) + eventElementMapper{}.linkerFor(e).SetPrev(b) + eventElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + eventElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *eventList) Remove(e *Event) { + prev := eventElementMapper{}.linkerFor(e).Prev() + next := eventElementMapper{}.linkerFor(e).Next() + + if prev != nil { + eventElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + eventElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type eventEntry struct { + next *Event + prev *Event +} + +// Next returns the entry that follows e in the list. +func (e *eventEntry) Next() *Event { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *eventEntry) Prev() *Event { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *eventEntry) SetNext(elem *Event) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *eventEntry) SetPrev(elem *Event) { + e.prev = elem +} diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD deleted file mode 100644 index a3b1e4bd6..000000000 --- a/pkg/sentry/fs/ext/BUILD +++ /dev/null @@ -1,86 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "dirent_list", - out = "dirent_list.go", - package = "ext", - prefix = "dirent", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dirent", - "Linker": "*dirent", - }, -) - -go_library( - name = "ext", - srcs = [ - "block_map_file.go", - "dentry.go", - "directory.go", - "dirent_list.go", - "ext.go", - "extent_file.go", - "file_description.go", - "filesystem.go", - "inode.go", - "regular_file.go", - "symlink.go", - "utils.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/fd", - "//pkg/log", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/ext/disklayout", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/safemem", - "//pkg/sentry/syscalls/linux", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "ext_test", - size = "small", - srcs = [ - "block_map_test.go", - "ext_test.go", - "extent_test.go", - ], - data = [ - "//pkg/sentry/fs/ext:assets/bigfile.txt", - "//pkg/sentry/fs/ext:assets/file.txt", - "//pkg/sentry/fs/ext:assets/tiny.ext2", - "//pkg/sentry/fs/ext:assets/tiny.ext3", - "//pkg/sentry/fs/ext:assets/tiny.ext4", - ], - embed = [":ext"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs/ext/disklayout", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//runsc/test/testutil", - "@com_github_google_go-cmp//cmp:go_default_library", - "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", - ], -) diff --git a/pkg/sentry/fs/ext/README.md b/pkg/sentry/fs/ext/README.md deleted file mode 100644 index e212717aa..000000000 --- a/pkg/sentry/fs/ext/README.md +++ /dev/null @@ -1,117 +0,0 @@ -## EXT(2/3/4) File System - -This is a filesystem driver which supports ext2, ext3 and ext4 filesystems. -Linux has specialized drivers for each variant but none which supports all. This -library takes advantage of ext's backward compatibility and understands the -internal organization of on-disk structures to support all variants. - -This driver implementation diverges from the Linux implementations in being more -forgiving about versioning. For instance, if a filesystem contains both extent -based inodes and classical block map based inodes, this driver will not complain -and interpret them both correctly. While in Linux this would be an issue. This -blurs the line between the three ext fs variants. - -Ext2 is considered deprecated as of Red Hat Enterprise Linux 7, and ext3 has -been superseded by ext4 by large performance gains. Thus it is recommended to -upgrade older filesystem images to ext4 using e2fsprogs for better performance. - -### Read Only - -This driver currently only allows read only operations. A lot of the design -decisions are based on this feature. There are plans to implement write (the -process for which is documented in the future work section). - -### Performance - -One of the biggest wins about this driver is that it directly talks to the -underlying block device (or whatever persistent storage is being used), instead -of making expensive RPCs to a gofer. - -Another advantage is that ext fs supports fast concurrent reads. Currently the -device is represented using a `io.ReaderAt` which allows for concurrent reads. -All reads are directly passed to the device driver which intelligently serves -the read requests in the optimal order. There is no congestion due to locking -while reading in the filesystem level. - -Reads are optimized further in the way file data is transferred over to user -memory. Ext fs directly copies over file data from disk into user memory with no -additional allocations on the way. We can only get faster by preloading file -data into memory (see future work section). - -The internal structures used to represent files, inodes and file descriptors use -a lot of inheritance. With the level of indirection that an interface adds with -an internal pointer, it can quickly fragment a structure across memory. As this -runs along side a full blown kernel (which is memory intensive), having a -fragmented struct might hurt performance. Hence these internal structures, -though interfaced, are tightly packed in memory using the same inheritance -pattern that pkg/sentry/vfs uses. The pkg/sentry/fs/ext/disklayout package makes -an execption to this pattern for reasons documented in the package. - -### Security - -This driver also intends to help sandbox the container better by reducing the -surface of the host kernel that the application touches. It prevents the -application from exploiting vulnerabilities in the host filesystem driver. All -`io.ReaderAt.ReadAt()` calls are translated to `pread(2)` which are directly -passed to the device driver in the kernel. Hence this reduces the surface for -attack. - -The application can not affect any host filesystems other than the one passed -via block device by the user. - -### Future Work - -#### Write - -To support write operations we would need to modify the block device underneath. -Currently, the driver does not modify the device at all, not even for updating -the access times for reads. Modifying the filesystem incorrectly can corrupt it -and render it unreadable for other correct ext(x) drivers. Hence caution must be -maintained while modifying metadata structures. - -Ext4 specifically is built for performance and has added a lot of complexity as -to how metadata structures are modified. For instance, files that are organized -via an extent tree which must be balanced and file data blocks must be placed in -the same extent as much as possible to increase locality. Such properties must -be maintained while modifying the tree. - -Ext filesystems boast a lot about locality, which plays a big role in them being -performant. The block allocation algorithm in Linux does a good job in keeping -related data together. This behavior must be maintained as much as possible, -else we might end up degrading the filesystem performance over time. - -Ext4 also supports a wide variety of features which are specialized for varying -use cases. Implementing all of them can get difficult very quickly. - -Ext(x) checksums all its metadata structures to check for corruption, so -modification of any metadata struct must correspond with re-checksumming the -struct. Linux filesystem drivers also order on-disk updates intelligently to not -corrupt the filesystem and also remain performant. The in-memory metadata -structures must be kept in sync with what is on disk. - -There is also replication of some important structures across the filesystem. -All replicas must be updated when their original copy is updated. There is also -provisioning for snapshotting which must be kept in mind, although it should not -affect this implementation unless we allow users to create filesystem snapshots. - -Ext4 also introduced journaling (jbd2). The journal must be updated -appropriately. - -#### Performance - -To improve performance we should implement a buffer cache, and optionally, read -ahead for small files. While doing so we must also keep in mind the memory usage -and have a reasonable cap on how much file data we want to hold in memory. - -#### Features - -Our current implementation will work with most ext4 filesystems for readonly -purposed. However, the following features are not supported yet: - -- Journal -- Snapshotting -- Extended Attributes -- Hash Tree Directories -- Meta Block Groups -- Multiple Mount Protection -- Bigalloc diff --git a/pkg/sentry/fs/ext/assets/README.md b/pkg/sentry/fs/ext/assets/README.md deleted file mode 100644 index 6f1e81b3a..000000000 --- a/pkg/sentry/fs/ext/assets/README.md +++ /dev/null @@ -1,36 +0,0 @@ -### Tiny Ext(2/3/4) Images - -The images are of size 64Kb which supports 64 1k blocks and 16 inodes. This is -the smallest size mkfs.ext(2/3/4) works with. - -These images were generated using the following commands. - -```bash -fallocate -l 64K tiny.ext$VERSION -mkfs.ext$VERSION -j tiny.ext$VERSION -``` - -where `VERSION` is `2`, `3` or `4`. - -You can mount it using: - -```bash -sudo mount -o loop tiny.ext$VERSION $MOUNTPOINT -``` - -`file.txt`, `bigfile.txt` and `symlink.txt` were added to this image by just -mounting it and copying (while preserving links) those files to the mountpoint -directory using: - -```bash -sudo cp -P {file.txt,symlink.txt,bigfile.txt} $MOUNTPOINT -``` - -The files in this directory mirror the contents and organisation of the files -stored in the image. - -You can umount the filesystem using: - -```bash -sudo umount $MOUNTPOINT -``` diff --git a/pkg/sentry/fs/ext/assets/bigfile.txt b/pkg/sentry/fs/ext/assets/bigfile.txt deleted file mode 100644 index 3857cf516..000000000 --- a/pkg/sentry/fs/ext/assets/bigfile.txt +++ /dev/null @@ -1,41 +0,0 @@ -Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus faucibus eleifend orci, ut ornare nibh faucibus eu. Cras at condimentum massa. Nullam luctus, elit non porttitor congue, sapien diam feugiat sapien, sed eleifend nulla mauris non arcu. Sed lacinia mauris magna, eu mollis libero varius sit amet. Donec mollis, quam convallis commodo posuere, dolor nisi placerat nisi, in faucibus augue mi eu lorem. In pharetra consectetur faucibus. Ut euismod ex efficitur egestas tincidunt. Maecenas condimentum ut ante in rutrum. Vivamus sed arcu tempor, faucibus turpis et, lacinia diam. - -Sed in lacus vel nisl interdum bibendum in sed justo. Nunc tellus risus, molestie vitae arcu sed, molestie tempus ligula. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc risus neque, volutpat et ante non, ullamcorper condimentum ante. Aliquam sed metus in urna condimentum convallis. Vivamus ut libero mauris. Proin mollis posuere consequat. Vestibulum placerat mollis est et pulvinar. - -Donec rutrum odio ac diam pharetra, id fermentum magna cursus. Pellentesque in dapibus elit, et condimentum orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse euismod dapibus est, id vestibulum mauris. Nulla facilisi. Nulla cursus gravida nisi. Phasellus vestibulum rutrum lectus, a dignissim mauris hendrerit vitae. In at elementum mauris. Integer vel efficitur velit. Nullam fringilla sapien mi, quis luctus neque efficitur ac. Aenean nec quam dapibus nunc commodo pharetra. Proin sapien mi, fermentum aliquet vulputate non, aliquet porttitor diam. Quisque lacinia, urna et finibus fermentum, nunc lacus vehicula ex, sed congue metus lectus ac quam. Aliquam erat volutpat. Suspendisse sodales, dolor ut tincidunt finibus, augue erat varius tellus, a interdum erat sem at nunc. Vestibulum cursus iaculis sapien, vitae feugiat dui auctor quis. - -Pellentesque nec maximus nulla, eu blandit diam. Maecenas quis arcu ornare, congue ante at, vehicula ipsum. Praesent feugiat mauris rutrum sem fermentum, nec luctus ipsum placerat. Pellentesque placerat ipsum at dignissim fringilla. Vivamus et posuere sem, eget hendrerit felis. Aenean vulputate, augue vel mollis feugiat, justo ipsum mollis dolor, eu mollis elit neque ut ipsum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce bibendum sem quam, vulputate laoreet mi dapibus imperdiet. Sed a purus non nibh pretium aliquet. Integer eget luctus augue, vitae tincidunt magna. Ut eros enim, egestas eu nulla et, lobortis egestas arcu. Cras id ipsum ac justo lacinia rutrum. Vivamus lectus leo, ultricies sed justo at, pellentesque feugiat magna. Ut sollicitudin neque elit, vel ornare mauris commodo id. - -Duis dapibus orci et sapien finibus finibus. Mauris eleifend, lacus at vestibulum maximus, quam ligula pharetra erat, sit amet dapibus neque elit vitae neque. In bibendum sollicitudin erat, eget ultricies tortor malesuada at. Sed sit amet orci turpis. Donec feugiat ligula nibh, molestie tincidunt lectus elementum id. Donec volutpat maximus nibh, in vulputate felis posuere eu. Cras tincidunt ullamcorper lacus. Phasellus porta lorem auctor, congue magna a, commodo elit. - -Etiam auctor mi quis elit sodales, eu pulvinar arcu condimentum. Aenean imperdiet risus et dapibus tincidunt. Nullam tincidunt dictum dui, sed commodo urna rutrum id. Ut mollis libero vel elit laoreet bibendum. Quisque arcu arcu, tincidunt at ultricies id, vulputate nec metus. In tristique posuere quam sit amet volutpat. Vivamus scelerisque et nunc at dapibus. Fusce finibus libero ut ligula pretium rhoncus. Mauris non elit in arcu finibus imperdiet. Pellentesque nec massa odio. Proin rutrum mauris non sagittis efficitur. Aliquam auctor quam at dignissim faucibus. Ut eget ligula in magna posuere ultricies vitae sit amet turpis. Duis maximus odio nulla. Donec gravida sem tristique tempus scelerisque. - -Interdum et malesuada fames ac ante ipsum primis in faucibus. Fusce pharetra magna vulputate aliquet tempus. Duis id hendrerit arcu. Quisque ut ex elit. Integer velit orci, venenatis ut sapien ac, placerat porttitor dui. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nunc hendrerit cursus diam, hendrerit finibus ipsum scelerisque ut. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. - -Nulla non euismod neque. Phasellus vel sapien eu metus pulvinar rhoncus. Suspendisse eu mollis tellus, quis vestibulum tortor. Maecenas interdum dolor sed nulla fermentum maximus. Donec imperdiet ullamcorper condimentum. Nam quis nibh ante. Praesent quis tellus ut tortor pulvinar blandit sit amet ut sapien. Vestibulum est orci, pellentesque vitae tristique sit amet, tristique non felis. - -Vivamus sodales pellentesque varius. Sed vel tempus ligula. Nulla tristique nisl vel dui facilisis, ac sodales augue hendrerit. Proin augue nisi, vestibulum quis augue nec, sagittis tincidunt velit. Vestibulum euismod, nulla nec sodales faucibus, urna sapien vulputate magna, id varius metus sapien ut neque. Duis in mollis urna, in scelerisque enim. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc condimentum dictum turpis, et egestas neque dapibus eget. Quisque fringilla, dui eu venenatis eleifend, erat nibh lacinia urna, at lacinia lacus sapien eu dui. Duis eu erat ut mi lacinia convallis a sed ex. - -Fusce elit metus, tincidunt nec eleifend a, hendrerit nec ligula. Duis placerat finibus sollicitudin. In euismod porta tellus, in luctus justo bibendum bibendum. Maecenas at magna eleifend lectus tincidunt suscipit ut a ligula. Nulla tempor accumsan felis, fermentum dapibus est eleifend vitae. Mauris urna sem, fringilla at ultricies non, ultrices in arcu. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nam vehicula nunc at laoreet imperdiet. Nunc tristique ut risus id aliquet. Integer eleifend massa orci. - -Vestibulum sed ante sollicitudin nisi fringilla bibendum nec vel quam. Sed pretium augue eu ligula congue pulvinar. Donec vitae magna tincidunt, pharetra lacus id, convallis nulla. Cras viverra nisl nisl, varius convallis leo vulputate nec. Morbi at consequat dui, sed aliquet metus. Sed suscipit fermentum mollis. Maecenas nec mi sodales, tincidunt purus in, tristique mauris. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec interdum mi in velit efficitur, quis ultrices ex imperdiet. Sed vestibulum, magna ut tristique pretium, mi ipsum placerat tellus, non tempor enim augue et ex. Pellentesque eget felis quis ante sodales viverra ac sed lacus. Donec suscipit tempus massa, eget laoreet massa molestie at. - -Aenean fringilla dui non aliquet consectetur. Fusce cursus quam nec orci hendrerit faucibus. Donec consequat suscipit enim, non volutpat lectus auctor interdum. Proin lorem purus, maximus vel orci vitae, suscipit egestas turpis. Donec risus urna, congue a sem eu, aliquet placerat odio. Morbi gravida tristique turpis, quis efficitur enim. Nunc interdum gravida ipsum vel facilisis. Nunc congue finibus sollicitudin. Quisque euismod aliquet lectus et tincidunt. Curabitur ultrices sem ut mi fringilla fermentum. Morbi pretium, nisi sit amet dapibus congue, dolor enim consectetur risus, a interdum ligula odio sed odio. Quisque facilisis, mi at suscipit gravida, nunc sapien cursus justo, ut luctus odio nulla quis leo. Integer condimentum lobortis mauris, non egestas tellus lobortis sit amet. - -In sollicitudin velit ac ante vehicula, vitae varius tortor mollis. In hac habitasse platea dictumst. Quisque et orci lorem. Integer malesuada fringilla luctus. Pellentesque malesuada, mi non lobortis porttitor, ante ligula vulputate ante, nec dictum risus eros sit amet sapien. Nulla aliquam lorem libero, ac varius nulla tristique eget. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut pellentesque mauris orci, vel consequat mi varius a. Ut sit amet elit vulputate, lacinia metus non, fermentum nisl. Pellentesque eu nisi sed quam egestas blandit. Duis sit amet lobortis dolor. Donec consectetur sem interdum, tristique elit sit amet, sodales lacus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Fusce id aliquam augue. Sed pretium congue risus vitae lacinia. Vestibulum non vulputate risus, ut malesuada justo. - -Sed odio elit, consectetur ac mauris quis, consequat commodo libero. Fusce sodales velit vulputate pulvinar fermentum. Donec iaculis nec nisl eget faucibus. Mauris at dictum velit. Donec fermentum lectus eu viverra volutpat. Aliquam consequat facilisis lorem, cursus consequat dui bibendum ullamcorper. Pellentesque nulla magna, imperdiet at magna et, cursus egestas enim. Nullam semper molestie lectus sit amet semper. Duis eget tincidunt est. Integer id neque risus. Integer ultricies hendrerit vestibulum. Donec blandit blandit sagittis. Nunc consectetur vitae nisi consectetur volutpat. - -Nulla id lorem fermentum, efficitur magna a, hendrerit dui. Vivamus sagittis orci gravida, bibendum quam eget, molestie est. Phasellus nec enim tincidunt, volutpat sapien non, laoreet diam. Nulla posuere enim nec porttitor lobortis. Donec auctor odio ut orci eleifend, ut eleifend purus convallis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut hendrerit, purus eget viverra tincidunt, sem magna imperdiet libero, et aliquam turpis neque vitae elit. Maecenas semper varius iaculis. Cras non lorem quis quam bibendum eleifend in et libero. Curabitur at purus mauris. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus porta diam sed elit eleifend gravida. - -Nulla facilisi. Ut ultricies diam vel diam consectetur, vel porta augue molestie. Fusce interdum sapien et metus facilisis pellentesque. Nulla convallis sem at nunc vehicula facilisis. Nam ac rutrum purus. Nunc bibendum, dolor sit amet tempus ullamcorper, lorem leo tempor sem, id fringilla nunc augue scelerisque augue. Nullam sit amet rutrum nisl. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Donec sed mauris gravida eros vehicula sagittis at eget orci. Cras elementum, eros at accumsan bibendum, libero neque blandit purus, vitae vestibulum libero massa ac nibh. Integer at placerat nulla. Mauris eu eleifend orci. Aliquam consequat ligula vitae erat porta lobortis. Duis fermentum elit ac aliquet ornare. - -Mauris eget cursus tellus, eget sodales purus. Aliquam malesuada, augue id vulputate finibus, nisi ex bibendum nisl, sit amet laoreet quam urna a dolor. Nullam ultricies, sapien eu laoreet consequat, erat eros dignissim diam, ultrices sodales lectus mauris et leo. Morbi lacinia eu ante at tempus. Sed iaculis finibus magna malesuada efficitur. Donec faucibus erat sit amet elementum feugiat. Praesent a placerat nisi. Etiam lacinia gravida diam, et sollicitudin sapien tincidunt ut. - -Maecenas felis quam, tincidunt vitae venenatis scelerisque, viverra vitae odio. Phasellus enim neque, ultricies suscipit malesuada sit amet, vehicula sit amet purus. Nulla placerat sit amet dui vel tincidunt. Nam quis neque vel magna commodo egestas. Vestibulum sagittis rutrum lorem ut congue. Maecenas vel ultrices tellus. Donec efficitur, urna ac consequat iaculis, lorem felis pharetra eros, eget faucibus orci lectus sit amet arcu. - -Ut a tempus nisi. Nulla facilisi. Praesent vulputate maximus mi et dapibus. Sed sit amet libero ac augue hendrerit efficitur in a sapien. Mauris placerat velit sit amet tellus sollicitudin faucibus. Donec egestas a magna ac suscipit. Duis enim sapien, mollis sed egestas et, vestibulum vel leo. - -Proin quis dapibus dui. Donec eu tincidunt nunc. Vivamus eget purus consectetur, maximus ante vitae, tincidunt elit. Aenean mattis dolor a gravida aliquam. Praesent quis tellus id sem maximus vulputate nec sed nulla. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur metus nulla, volutpat volutpat est eu, hendrerit congue erat. Aliquam sollicitudin augue ante. Sed sollicitudin, magna eu consequat elementum, mi augue ullamcorper felis, molestie imperdiet erat metus iaculis est. Proin ac tortor nisi. Pellentesque quis nisi risus. Integer enim sapien, tincidunt quis tortor id, accumsan venenatis mi. Nulla facilisi. - -Cras pretium sit amet quam congue maximus. Morbi lacus libero, imperdiet commodo massa sed, scelerisque placerat libero. Cras nisl nisi, consectetur sed bibendum eu, venenatis at enim. Proin sodales justo at quam aliquam, a consectetur mi ornare. Donec porta ac est sit amet efficitur. Suspendisse vestibulum tortor id neque imperdiet, id lacinia risus vehicula. Phasellus ac eleifend purus. Mauris vel gravida ante. Aliquam vitae lobortis risus. Sed vehicula consectetur tincidunt. Nam et justo vitae purus molestie consequat. Pellentesque ipsum ex, convallis quis blandit non, gravida et urna. Donec diam ligula amet. diff --git a/pkg/sentry/fs/ext/assets/file.txt b/pkg/sentry/fs/ext/assets/file.txt deleted file mode 100644 index 980a0d5f1..000000000 --- a/pkg/sentry/fs/ext/assets/file.txt +++ /dev/null @@ -1 +0,0 @@ -Hello World! diff --git a/pkg/sentry/fs/ext/assets/symlink.txt b/pkg/sentry/fs/ext/assets/symlink.txt deleted file mode 120000 index 4c330738c..000000000 --- a/pkg/sentry/fs/ext/assets/symlink.txt +++ /dev/null @@ -1 +0,0 @@ -file.txt
\ No newline at end of file diff --git a/pkg/sentry/fs/ext/assets/tiny.ext2 b/pkg/sentry/fs/ext/assets/tiny.ext2 Binary files differdeleted file mode 100644 index 381ade9bf..000000000 --- a/pkg/sentry/fs/ext/assets/tiny.ext2 +++ /dev/null diff --git a/pkg/sentry/fs/ext/assets/tiny.ext3 b/pkg/sentry/fs/ext/assets/tiny.ext3 Binary files differdeleted file mode 100644 index 0e97a324c..000000000 --- a/pkg/sentry/fs/ext/assets/tiny.ext3 +++ /dev/null diff --git a/pkg/sentry/fs/ext/assets/tiny.ext4 b/pkg/sentry/fs/ext/assets/tiny.ext4 Binary files differdeleted file mode 100644 index a6859736d..000000000 --- a/pkg/sentry/fs/ext/assets/tiny.ext4 +++ /dev/null diff --git a/pkg/sentry/fs/ext/block_map_file.go b/pkg/sentry/fs/ext/block_map_file.go deleted file mode 100644 index cea89bcd9..000000000 --- a/pkg/sentry/fs/ext/block_map_file.go +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - "math" - - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/syserror" -) - -const ( - // numDirectBlks is the number of direct blocks in ext block map inodes. - numDirectBlks = 12 -) - -// blockMapFile is a type of regular file which uses direct/indirect block -// addressing to store file data. This was deprecated in ext4. -type blockMapFile struct { - regFile regularFile - - // directBlks are the direct blocks numbers. The physical blocks pointed by - // these holds file data. Contains file blocks 0 to 11. - directBlks [numDirectBlks]uint32 - - // indirectBlk is the physical block which contains (blkSize/4) direct block - // numbers (as uint32 integers). - indirectBlk uint32 - - // doubleIndirectBlk is the physical block which contains (blkSize/4) indirect - // block numbers (as uint32 integers). - doubleIndirectBlk uint32 - - // tripleIndirectBlk is the physical block which contains (blkSize/4) doubly - // indirect block numbers (as uint32 integers). - tripleIndirectBlk uint32 - - // coverage at (i)th index indicates the amount of file data a node at - // height (i) covers. Height 0 is the direct block. - coverage [4]uint64 -} - -// Compiles only if blockMapFile implements io.ReaderAt. -var _ io.ReaderAt = (*blockMapFile)(nil) - -// newBlockMapFile is the blockMapFile constructor. It initializes the file to -// physical blocks map with (at most) the first 12 (direct) blocks. -func newBlockMapFile(regFile regularFile) (*blockMapFile, error) { - file := &blockMapFile{regFile: regFile} - file.regFile.impl = file - - for i := uint(0); i < 4; i++ { - file.coverage[i] = getCoverage(regFile.inode.blkSize, i) - } - - blkMap := regFile.inode.diskInode.Data() - binary.Unmarshal(blkMap[:numDirectBlks*4], binary.LittleEndian, &file.directBlks) - binary.Unmarshal(blkMap[numDirectBlks*4:(numDirectBlks+1)*4], binary.LittleEndian, &file.indirectBlk) - binary.Unmarshal(blkMap[(numDirectBlks+1)*4:(numDirectBlks+2)*4], binary.LittleEndian, &file.doubleIndirectBlk) - binary.Unmarshal(blkMap[(numDirectBlks+2)*4:(numDirectBlks+3)*4], binary.LittleEndian, &file.tripleIndirectBlk) - return file, nil -} - -// ReadAt implements io.ReaderAt.ReadAt. -func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) { - if len(dst) == 0 { - return 0, nil - } - - if off < 0 { - return 0, syserror.EINVAL - } - - offset := uint64(off) - size := f.regFile.inode.diskInode.Size() - if offset >= size { - return 0, io.EOF - } - - // dirBlksEnd is the file offset until which direct blocks cover file data. - // Direct blocks cover 0 <= file offset < dirBlksEnd. - dirBlksEnd := numDirectBlks * f.coverage[0] - - // indirBlkEnd is the file offset until which the indirect block covers file - // data. The indirect block covers dirBlksEnd <= file offset < indirBlkEnd. - indirBlkEnd := dirBlksEnd + f.coverage[1] - - // doubIndirBlkEnd is the file offset until which the double indirect block - // covers file data. The double indirect block covers the range - // indirBlkEnd <= file offset < doubIndirBlkEnd. - doubIndirBlkEnd := indirBlkEnd + f.coverage[2] - - read := 0 - toRead := len(dst) - if uint64(toRead)+offset > size { - toRead = int(size - offset) - } - for read < toRead { - var err error - var curR int - - // Figure out which block to delegate the read to. - switch { - case offset < dirBlksEnd: - // Direct block. - curR, err = f.read(f.directBlks[offset/f.regFile.inode.blkSize], offset%f.regFile.inode.blkSize, 0, dst[read:]) - case offset < indirBlkEnd: - // Indirect block. - curR, err = f.read(f.indirectBlk, offset-dirBlksEnd, 1, dst[read:]) - case offset < doubIndirBlkEnd: - // Doubly indirect block. - curR, err = f.read(f.doubleIndirectBlk, offset-indirBlkEnd, 2, dst[read:]) - default: - // Triply indirect block. - curR, err = f.read(f.tripleIndirectBlk, offset-doubIndirBlkEnd, 3, dst[read:]) - } - - read += curR - offset += uint64(curR) - if err != nil { - return read, err - } - } - - if read < len(dst) { - return read, io.EOF - } - return read, nil -} - -// read is the recursive step of the ReadAt function. It relies on knowing the -// current node's location on disk (curPhyBlk) and its height in the block map -// tree. A height of 0 shows that the current node is actually holding file -// data. relFileOff tells the offset from which we need to start to reading -// under the current node. It is completely relative to the current node. -func (f *blockMapFile) read(curPhyBlk uint32, relFileOff uint64, height uint, dst []byte) (int, error) { - curPhyBlkOff := int64(curPhyBlk) * int64(f.regFile.inode.blkSize) - if height == 0 { - toRead := int(f.regFile.inode.blkSize - relFileOff) - if len(dst) < toRead { - toRead = len(dst) - } - - n, _ := f.regFile.inode.dev.ReadAt(dst[:toRead], curPhyBlkOff+int64(relFileOff)) - if n < toRead { - return n, syserror.EIO - } - return n, nil - } - - childCov := f.coverage[height-1] - startIdx := relFileOff / childCov - endIdx := f.regFile.inode.blkSize / 4 // This is exclusive. - wantEndIdx := (relFileOff + uint64(len(dst))) / childCov - wantEndIdx++ // Make this exclusive. - if wantEndIdx < endIdx { - endIdx = wantEndIdx - } - - read := 0 - curChildOff := relFileOff % childCov - for i := startIdx; i < endIdx; i++ { - var childPhyBlk uint32 - err := readFromDisk(f.regFile.inode.dev, curPhyBlkOff+int64(i*4), &childPhyBlk) - if err != nil { - return read, err - } - - n, err := f.read(childPhyBlk, curChildOff, height-1, dst[read:]) - read += n - if err != nil { - return read, err - } - - curChildOff = 0 - } - - return read, nil -} - -// getCoverage returns the number of bytes a node at the given height covers. -// Height 0 is the file data block itself. Height 1 is the indirect block. -// -// Formula: blkSize * ((blkSize / 4)^height) -func getCoverage(blkSize uint64, height uint) uint64 { - return blkSize * uint64(math.Pow(float64(blkSize/4), float64(height))) -} diff --git a/pkg/sentry/fs/ext/block_map_test.go b/pkg/sentry/fs/ext/block_map_test.go deleted file mode 100644 index f8dd6bf9f..000000000 --- a/pkg/sentry/fs/ext/block_map_test.go +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "bytes" - "math/rand" - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" -) - -// These consts are for mocking the block map tree. -const ( - mockBMBlkSize = uint32(16) - mockBMDiskSize = 2500 -) - -// TestBlockMapReader stress tests block map reader functionality. It performs -// random length reads from all possible positions in the block map structure. -func TestBlockMapReader(t *testing.T) { - mockBMFile, want := blockMapSetUp(t) - n := len(want) - - for from := 0; from < n; from++ { - got := make([]byte, n-from) - - if read, err := mockBMFile.ReadAt(got, int64(from)); err != nil { - t.Fatalf("file read operation from offset %d to %d only read %d bytes: %v", from, n, read, err) - } - - if diff := cmp.Diff(got, want[from:]); diff != "" { - t.Fatalf("file data from offset %d to %d mismatched (-want +got):\n%s", from, n, diff) - } - } -} - -// blkNumGen is a number generator which gives block numbers for building the -// block map file on disk. It gives unique numbers in a random order which -// facilitates in creating an extremely fragmented filesystem. -type blkNumGen struct { - nums []uint32 -} - -// newBlkNumGen is the blkNumGen constructor. -func newBlkNumGen() *blkNumGen { - blkNums := &blkNumGen{} - lim := mockBMDiskSize / mockBMBlkSize - blkNums.nums = make([]uint32, lim) - for i := uint32(0); i < lim; i++ { - blkNums.nums[i] = i - } - - rand.Shuffle(int(lim), func(i, j int) { - blkNums.nums[i], blkNums.nums[j] = blkNums.nums[j], blkNums.nums[i] - }) - return blkNums -} - -// next returns the next random block number. -func (n *blkNumGen) next() uint32 { - ret := n.nums[0] - n.nums = n.nums[1:] - return ret -} - -// blockMapSetUp creates a mock disk and a block map file. It initializes the -// block map file with 12 direct block, 1 indirect block, 1 double indirect -// block and 1 triple indirect block (basically fill it till the rim). It -// initializes the disk to reflect the inode. Also returns the file data that -// the inode covers and that is written to disk. -func blockMapSetUp(t *testing.T) (*blockMapFile, []byte) { - mockDisk := make([]byte, mockBMDiskSize) - regFile := regularFile{ - inode: inode{ - diskInode: &disklayout.InodeNew{ - InodeOld: disklayout.InodeOld{ - SizeLo: getMockBMFileFize(), - }, - }, - dev: bytes.NewReader(mockDisk), - blkSize: uint64(mockBMBlkSize), - }, - } - - var fileData []byte - blkNums := newBlkNumGen() - var data []byte - - // Write the direct blocks. - for i := 0; i < numDirectBlks; i++ { - curBlkNum := blkNums.next() - data = binary.Marshal(data, binary.LittleEndian, curBlkNum) - fileData = append(fileData, writeFileDataToBlock(mockDisk, curBlkNum, 0, blkNums)...) - } - - // Write to indirect block. - indirectBlk := blkNums.next() - data = binary.Marshal(data, binary.LittleEndian, indirectBlk) - fileData = append(fileData, writeFileDataToBlock(mockDisk, indirectBlk, 1, blkNums)...) - - // Write to indirect block. - doublyIndirectBlk := blkNums.next() - data = binary.Marshal(data, binary.LittleEndian, doublyIndirectBlk) - fileData = append(fileData, writeFileDataToBlock(mockDisk, doublyIndirectBlk, 2, blkNums)...) - - // Write to indirect block. - triplyIndirectBlk := blkNums.next() - data = binary.Marshal(data, binary.LittleEndian, triplyIndirectBlk) - fileData = append(fileData, writeFileDataToBlock(mockDisk, triplyIndirectBlk, 3, blkNums)...) - - copy(regFile.inode.diskInode.Data(), data) - - mockFile, err := newBlockMapFile(regFile) - if err != nil { - t.Fatalf("newBlockMapFile failed: %v", err) - } - return mockFile, fileData -} - -// writeFileDataToBlock writes random bytes to the block on disk. -func writeFileDataToBlock(disk []byte, blkNum uint32, height uint, blkNums *blkNumGen) []byte { - if height == 0 { - start := blkNum * mockBMBlkSize - end := start + mockBMBlkSize - rand.Read(disk[start:end]) - return disk[start:end] - } - - var fileData []byte - for off := blkNum * mockBMBlkSize; off < (blkNum+1)*mockBMBlkSize; off += 4 { - curBlkNum := blkNums.next() - copy(disk[off:off+4], binary.Marshal(nil, binary.LittleEndian, curBlkNum)) - fileData = append(fileData, writeFileDataToBlock(disk, curBlkNum, height-1, blkNums)...) - } - return fileData -} - -// getMockBMFileFize gets the size of the mock block map file which is used for -// testing. -func getMockBMFileFize() uint32 { - return uint32(numDirectBlks*getCoverage(uint64(mockBMBlkSize), 0) + getCoverage(uint64(mockBMBlkSize), 1) + getCoverage(uint64(mockBMBlkSize), 2) + getCoverage(uint64(mockBMBlkSize), 3)) -} diff --git a/pkg/sentry/fs/ext/dentry.go b/pkg/sentry/fs/ext/dentry.go deleted file mode 100644 index 054fb42b6..000000000 --- a/pkg/sentry/fs/ext/dentry.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// dentry implements vfs.DentryImpl. -type dentry struct { - vfsd vfs.Dentry - - // inode is the inode represented by this dentry. Multiple Dentries may - // share a single non-directory Inode (with hard links). inode is - // immutable. - inode *inode -} - -// Compiles only if dentry implements vfs.DentryImpl. -var _ vfs.DentryImpl = (*dentry)(nil) - -// newDentry is the dentry constructor. -func newDentry(in *inode) *dentry { - d := &dentry{ - inode: in, - } - d.vfsd.Init(d) - return d -} - -// IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef(vfsfs *vfs.Filesystem) { - d.inode.incRef() -} - -// TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool { - return d.inode.tryIncRef() -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef(vfsfs *vfs.Filesystem) { - d.inode.decRef(vfsfs.Impl().(*filesystem)) -} diff --git a/pkg/sentry/fs/ext/directory.go b/pkg/sentry/fs/ext/directory.go deleted file mode 100644 index 1ba8bf54c..000000000 --- a/pkg/sentry/fs/ext/directory.go +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "sync" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// directory represents a directory inode. It holds the childList in memory. -type directory struct { - inode inode - - // mu serializes the changes to childList. - // Lock Order (outermost locks must be taken first): - // directory.mu - // filesystem.mu - mu sync.Mutex - - // childList is a list containing (1) child dirents and (2) fake dirents - // (with diskDirent == nil) that represent the iteration position of - // directoryFDs. childList is used to support directoryFD.IterDirents() - // efficiently. childList is protected by mu. - childList direntList - - // childMap maps the child's filename to the dirent structure stored in - // childList. This adds some data replication but helps in faster path - // traversal. For consistency, key == childMap[key].diskDirent.FileName(). - // Immutable. - childMap map[string]*dirent -} - -// newDirectroy is the directory constructor. -func newDirectroy(inode inode, newDirent bool) (*directory, error) { - file := &directory{inode: inode, childMap: make(map[string]*dirent)} - file.inode.impl = file - - // Initialize childList by reading dirents from the underlying file. - if inode.diskInode.Flags().Index { - // TODO(b/134676337): Support hash tree directories. Currently only the '.' - // and '..' entries are read in. - - // Users cannot navigate this hash tree directory yet. - log.Warningf("hash tree directory being used which is unsupported") - return file, nil - } - - // The dirents are organized in a linear array in the file data. - // Extract the file data and decode the dirents. - regFile, err := newRegularFile(inode) - if err != nil { - return nil, err - } - - // buf is used as scratch space for reading in dirents from disk and - // unmarshalling them into dirent structs. - buf := make([]byte, disklayout.DirentSize) - size := inode.diskInode.Size() - for off, inc := uint64(0), uint64(0); off < size; off += inc { - toRead := size - off - if toRead > disklayout.DirentSize { - toRead = disklayout.DirentSize - } - if n, err := regFile.impl.ReadAt(buf[:toRead], int64(off)); uint64(n) < toRead { - return nil, err - } - - var curDirent dirent - if newDirent { - curDirent.diskDirent = &disklayout.DirentNew{} - } else { - curDirent.diskDirent = &disklayout.DirentOld{} - } - binary.Unmarshal(buf, binary.LittleEndian, curDirent.diskDirent) - - if curDirent.diskDirent.Inode() != 0 && len(curDirent.diskDirent.FileName()) != 0 { - // Inode number and name length fields being set to 0 is used to indicate - // an unused dirent. - file.childList.PushBack(&curDirent) - file.childMap[curDirent.diskDirent.FileName()] = &curDirent - } - - // The next dirent is placed exactly after this dirent record on disk. - inc = uint64(curDirent.diskDirent.RecordSize()) - } - - return file, nil -} - -func (i *inode) isDir() bool { - _, ok := i.impl.(*directory) - return ok -} - -// dirent is the directory.childList node. -type dirent struct { - diskDirent disklayout.Dirent - - // direntEntry links dirents into their parent directory.childList. - direntEntry -} - -// directoryFD represents a directory file description. It implements -// vfs.FileDescriptionImpl. -type directoryFD struct { - fileDescription - vfs.DirectoryFileDescriptionDefaultImpl - - // Protected by directory.mu. - iter *dirent - off int64 -} - -// Compiles only if directoryFD implements vfs.FileDescriptionImpl. -var _ vfs.FileDescriptionImpl = (*directoryFD)(nil) - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { - if fd.iter == nil { - return - } - - dir := fd.inode().impl.(*directory) - dir.mu.Lock() - dir.childList.Remove(fd.iter) - dir.mu.Unlock() - fd.iter = nil -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - extfs := fd.filesystem() - dir := fd.inode().impl.(*directory) - - dir.mu.Lock() - defer dir.mu.Unlock() - - // Ensure that fd.iter exists and is not linked into dir.childList. - var child *dirent - if fd.iter == nil { - // Start iteration at the beginning of dir. - child = dir.childList.Front() - fd.iter = &dirent{} - } else { - // Continue iteration from where we left off. - child = fd.iter.Next() - dir.childList.Remove(fd.iter) - } - for ; child != nil; child = child.Next() { - // Skip other directoryFD iterators. - if child.diskDirent != nil { - childType, ok := child.diskDirent.FileType() - if !ok { - // We will need to read the inode off disk. Do not increment - // ref count here because this inode is not being added to the - // dentry tree. - extfs.mu.Lock() - childInode, err := extfs.getOrCreateInodeLocked(child.diskDirent.Inode()) - extfs.mu.Unlock() - if err != nil { - // Usage of the file description after the error is - // undefined. This implementation would continue reading - // from the next dirent. - fd.off++ - dir.childList.InsertAfter(child, fd.iter) - return err - } - childType = fs.ToInodeType(childInode.diskInode.Mode().FileType()) - } - - if !cb.Handle(vfs.Dirent{ - Name: child.diskDirent.FileName(), - Type: fs.ToDirentType(childType), - Ino: uint64(child.diskDirent.Inode()), - Off: fd.off, - }) { - dir.childList.InsertBefore(child, fd.iter) - return nil - } - fd.off++ - } - } - dir.childList.PushBack(fd.iter) - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - if whence != linux.SEEK_SET && whence != linux.SEEK_CUR { - return 0, syserror.EINVAL - } - - dir := fd.inode().impl.(*directory) - - dir.mu.Lock() - defer dir.mu.Unlock() - - // Find resulting offset. - if whence == linux.SEEK_CUR { - offset += fd.off - } - - if offset < 0 { - // lseek(2) specifies that EINVAL should be returned if the resulting offset - // is negative. - return 0, syserror.EINVAL - } - - n := int64(len(dir.childMap)) - realWantOff := offset - if realWantOff > n { - realWantOff = n - } - realCurOff := fd.off - if realCurOff > n { - realCurOff = n - } - - // Ensure that fd.iter exists and is linked into dir.childList so we can - // intelligently seek from the optimal position. - if fd.iter == nil { - fd.iter = &dirent{} - dir.childList.PushFront(fd.iter) - } - - // Guess that iterating from the current position is optimal. - child := fd.iter - diff := realWantOff - realCurOff // Shows direction and magnitude of travel. - - // See if starting from the beginning or end is better. - abDiff := diff - if diff < 0 { - abDiff = -diff - } - if abDiff > realWantOff { - // Starting from the beginning is best. - child = dir.childList.Front() - diff = realWantOff - } else if abDiff > (n - realWantOff) { - // Starting from the end is best. - child = dir.childList.Back() - // (n - 1) because the last non-nil dirent represents the (n-1)th offset. - diff = realWantOff - (n - 1) - } - - for child != nil { - // Skip other directoryFD iterators. - if child.diskDirent != nil { - if diff == 0 { - if child != fd.iter { - dir.childList.Remove(fd.iter) - dir.childList.InsertBefore(child, fd.iter) - } - - fd.off = offset - return offset, nil - } - - if diff < 0 { - diff++ - child = child.Prev() - } else { - diff-- - child = child.Next() - } - continue - } - - if diff < 0 { - child = child.Prev() - } else { - child = child.Next() - } - } - - // Reaching here indicates that the offset is beyond the end of the childList. - dir.childList.Remove(fd.iter) - dir.childList.PushBack(fd.iter) - fd.off = offset - return offset, nil -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *directoryFD) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error { - // mmap(2) specifies that EACCESS should be returned for non-regular file fds. - return syserror.EACCES -} diff --git a/pkg/sentry/fs/ext/disklayout/BUILD b/pkg/sentry/fs/ext/disklayout/BUILD deleted file mode 100644 index dde15110d..000000000 --- a/pkg/sentry/fs/ext/disklayout/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "disklayout", - srcs = [ - "block_group.go", - "block_group_32.go", - "block_group_64.go", - "dirent.go", - "dirent_new.go", - "dirent_old.go", - "disklayout.go", - "extent.go", - "inode.go", - "inode_new.go", - "inode_old.go", - "superblock.go", - "superblock_32.go", - "superblock_64.go", - "superblock_old.go", - "test_utils.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - ], -) - -go_test( - name = "disklayout_test", - size = "small", - srcs = [ - "block_group_test.go", - "dirent_test.go", - "extent_test.go", - "inode_test.go", - "superblock_test.go", - ], - embed = [":disklayout"], - deps = ["//pkg/sentry/kernel/time"], -) diff --git a/pkg/sentry/fs/ext/disklayout/block_group.go b/pkg/sentry/fs/ext/disklayout/block_group.go deleted file mode 100644 index ad6f4fef8..000000000 --- a/pkg/sentry/fs/ext/disklayout/block_group.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// BlockGroup represents a Linux ext block group descriptor. An ext file system -// is split into a series of block groups. This provides an access layer to -// information needed to access and use a block group. -// -// Location: -// - The block group descriptor table is always placed in the blocks -// immediately after the block containing the superblock. -// - The 1st block group descriptor in the original table is in the -// (sb.FirstDataBlock() + 1)th block. -// - See SuperBlock docs to see where the block group descriptor table is -// replicated. -// - sb.BgDescSize() must be used as the block group descriptor entry size -// while reading the table from disk. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors. -type BlockGroup interface { - // InodeTable returns the absolute block number of the block containing the - // inode table. This points to an array of Inode structs. Inode tables are - // statically allocated at mkfs time. The superblock records the number of - // inodes per group (length of this table) and the size of each inode struct. - InodeTable() uint64 - - // BlockBitmap returns the absolute block number of the block containing the - // block bitmap. This bitmap tracks the usage of data blocks within this block - // group and has its own checksum. - BlockBitmap() uint64 - - // InodeBitmap returns the absolute block number of the block containing the - // inode bitmap. This bitmap tracks the usage of this group's inode table - // entries and has its own checksum. - InodeBitmap() uint64 - - // ExclusionBitmap returns the absolute block number of the snapshot exclusion - // bitmap. - ExclusionBitmap() uint64 - - // FreeBlocksCount returns the number of free blocks in the group. - FreeBlocksCount() uint32 - - // FreeInodesCount returns the number of free inodes in the group. - FreeInodesCount() uint32 - - // DirectoryCount returns the number of inodes that represent directories - // under this block group. - DirectoryCount() uint32 - - // UnusedInodeCount returns the number of unused inodes beyond the last used - // inode in this group's inode table. As a result, we needn’t scan past the - // (InodesPerGroup - UnusedInodeCount())th entry in the inode table. - UnusedInodeCount() uint32 - - // BlockBitmapChecksum returns the block bitmap checksum. This is calculated - // using crc32c(FS UUID + group number + entire bitmap). - BlockBitmapChecksum() uint32 - - // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated - // using crc32c(FS UUID + group number + entire bitmap). - InodeBitmapChecksum() uint32 - - // Checksum returns this block group's checksum. - // - // If SbMetadataCsum feature is set: - // - checksum is crc32c(FS UUID + group number + group descriptor - // structure) & 0xFFFF. - // - // If SbGdtCsum feature is set: - // - checksum is crc16(FS UUID + group number + group descriptor - // structure). - // - // SbMetadataCsum and SbGdtCsum should not be both set. - // If they are, Linux warns and asks to run fsck. - Checksum() uint16 - - // Flags returns BGFlags which represents the block group flags. - Flags() BGFlags -} - -// These are the different block group flags. -const ( - // BgInodeUninit indicates that inode table and bitmap are not initialized. - BgInodeUninit uint16 = 0x1 - - // BgBlockUninit indicates that block bitmap is not initialized. - BgBlockUninit uint16 = 0x2 - - // BgInodeZeroed indicates that inode table is zeroed. - BgInodeZeroed uint16 = 0x4 -) - -// BGFlags represents all the different combinations of block group flags. -type BGFlags struct { - InodeUninit bool - BlockUninit bool - InodeZeroed bool -} - -// ToInt converts a BGFlags struct back to its 16-bit representation. -func (f BGFlags) ToInt() uint16 { - var res uint16 - - if f.InodeUninit { - res |= BgInodeUninit - } - if f.BlockUninit { - res |= BgBlockUninit - } - if f.InodeZeroed { - res |= BgInodeZeroed - } - - return res -} - -// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct. -func BGFlagsFromInt(flags uint16) BGFlags { - return BGFlags{ - InodeUninit: flags&BgInodeUninit > 0, - BlockUninit: flags&BgBlockUninit > 0, - InodeZeroed: flags&BgInodeZeroed > 0, - } -} diff --git a/pkg/sentry/fs/ext/disklayout/block_group_32.go b/pkg/sentry/fs/ext/disklayout/block_group_32.go deleted file mode 100644 index 3e16c76db..000000000 --- a/pkg/sentry/fs/ext/disklayout/block_group_32.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// BlockGroup32Bit emulates the first half of struct ext4_group_desc in -// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and -// 32-bit ext4 filesystems. It implements BlockGroup interface. -type BlockGroup32Bit struct { - BlockBitmapLo uint32 - InodeBitmapLo uint32 - InodeTableLo uint32 - FreeBlocksCountLo uint16 - FreeInodesCountLo uint16 - UsedDirsCountLo uint16 - FlagsRaw uint16 - ExcludeBitmapLo uint32 - BlockBitmapChecksumLo uint16 - InodeBitmapChecksumLo uint16 - ItableUnusedLo uint16 - ChecksumRaw uint16 -} - -// Compiles only if BlockGroup32Bit implements BlockGroup. -var _ BlockGroup = (*BlockGroup32Bit)(nil) - -// InodeTable implements BlockGroup.InodeTable. -func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) } - -// BlockBitmap implements BlockGroup.BlockBitmap. -func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) } - -// InodeBitmap implements BlockGroup.InodeBitmap. -func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) } - -// ExclusionBitmap implements BlockGroup.ExclusionBitmap. -func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) } - -// FreeBlocksCount implements BlockGroup.FreeBlocksCount. -func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) } - -// FreeInodesCount implements BlockGroup.FreeInodesCount. -func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) } - -// DirectoryCount implements BlockGroup.DirectoryCount. -func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) } - -// UnusedInodeCount implements BlockGroup.UnusedInodeCount. -func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) } - -// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. -func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) } - -// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. -func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) } - -// Checksum implements BlockGroup.Checksum. -func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw } - -// Flags implements BlockGroup.Flags. -func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) } diff --git a/pkg/sentry/fs/ext/disklayout/block_group_64.go b/pkg/sentry/fs/ext/disklayout/block_group_64.go deleted file mode 100644 index 9a809197a..000000000 --- a/pkg/sentry/fs/ext/disklayout/block_group_64.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h. -// It is the block group descriptor struct for 64-bit ext4 filesystems. -// It implements BlockGroup interface. It is an extension of the 32-bit -// version of BlockGroup. -type BlockGroup64Bit struct { - // We embed the 32-bit struct here because 64-bit version is just an extension - // of the 32-bit version. - BlockGroup32Bit - - // 64-bit specific fields. - BlockBitmapHi uint32 - InodeBitmapHi uint32 - InodeTableHi uint32 - FreeBlocksCountHi uint16 - FreeInodesCountHi uint16 - UsedDirsCountHi uint16 - ItableUnusedHi uint16 - ExcludeBitmapHi uint32 - BlockBitmapChecksumHi uint16 - InodeBitmapChecksumHi uint16 - _ uint32 // Padding to 64 bytes. -} - -// Compiles only if BlockGroup64Bit implements BlockGroup. -var _ BlockGroup = (*BlockGroup64Bit)(nil) - -// Methods to override. Checksum() and Flags() are not overridden. - -// InodeTable implements BlockGroup.InodeTable. -func (bg *BlockGroup64Bit) InodeTable() uint64 { - return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo) -} - -// BlockBitmap implements BlockGroup.BlockBitmap. -func (bg *BlockGroup64Bit) BlockBitmap() uint64 { - return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo) -} - -// InodeBitmap implements BlockGroup.InodeBitmap. -func (bg *BlockGroup64Bit) InodeBitmap() uint64 { - return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo) -} - -// ExclusionBitmap implements BlockGroup.ExclusionBitmap. -func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 { - return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo) -} - -// FreeBlocksCount implements BlockGroup.FreeBlocksCount. -func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 { - return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo) -} - -// FreeInodesCount implements BlockGroup.FreeInodesCount. -func (bg *BlockGroup64Bit) FreeInodesCount() uint32 { - return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo) -} - -// DirectoryCount implements BlockGroup.DirectoryCount. -func (bg *BlockGroup64Bit) DirectoryCount() uint32 { - return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo) -} - -// UnusedInodeCount implements BlockGroup.UnusedInodeCount. -func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 { - return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo) -} - -// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. -func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 { - return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo) -} - -// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. -func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 { - return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo) -} diff --git a/pkg/sentry/fs/ext/disklayout/block_group_test.go b/pkg/sentry/fs/ext/disklayout/block_group_test.go deleted file mode 100644 index 0ef4294c0..000000000 --- a/pkg/sentry/fs/ext/disklayout/block_group_test.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestBlockGroupSize tests that the block group descriptor structs are of the -// correct size. -func TestBlockGroupSize(t *testing.T) { - assertSize(t, BlockGroup32Bit{}, 32) - assertSize(t, BlockGroup64Bit{}, 64) -} diff --git a/pkg/sentry/fs/ext/disklayout/dirent.go b/pkg/sentry/fs/ext/disklayout/dirent.go deleted file mode 100644 index 417b6cf65..000000000 --- a/pkg/sentry/fs/ext/disklayout/dirent.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -const ( - // MaxFileName is the maximum length of an ext fs file's name. - MaxFileName = 255 - - // DirentSize is the size of ext dirent structures. - DirentSize = 263 -) - -var ( - // inodeTypeByFileType maps ext4 file types to vfs inode types. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#ftype. - inodeTypeByFileType = map[uint8]fs.InodeType{ - 0: fs.Anonymous, - 1: fs.RegularFile, - 2: fs.Directory, - 3: fs.CharacterDevice, - 4: fs.BlockDevice, - 5: fs.Pipe, - 6: fs.Socket, - 7: fs.Symlink, - } -) - -// The Dirent interface should be implemented by structs representing ext -// directory entries. These are for the linear classical directories which -// just store a list of dirent structs. A directory is a series of data blocks -// where is each data block contains a linear array of dirents. The last entry -// of the block has a record size that takes it to the end of the block. The -// end of the directory is when you read dirInode.Size() bytes from the blocks. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#linear-classic-directories. -type Dirent interface { - // Inode returns the absolute inode number of the underlying inode. - // Inode number 0 signifies an unused dirent. - Inode() uint32 - - // RecordSize returns the record length of this dirent on disk. The next - // dirent in the dirent list should be read after these many bytes from - // the current dirent. Must be a multiple of 4. - RecordSize() uint16 - - // FileName returns the name of the file. Can be at most 255 is length. - FileName() string - - // FileType returns the inode type of the underlying inode. This is a - // performance hack so that we do not have to read the underlying inode struct - // to know the type of inode. This will only work when the SbDirentFileType - // feature is set. If not, the second returned value will be false indicating - // that user code has to use the inode mode to extract the file type. - FileType() (fs.InodeType, bool) -} diff --git a/pkg/sentry/fs/ext/disklayout/dirent_new.go b/pkg/sentry/fs/ext/disklayout/dirent_new.go deleted file mode 100644 index 29ae4a5c2..000000000 --- a/pkg/sentry/fs/ext/disklayout/dirent_new.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// DirentNew represents the ext4 directory entry struct. This emulates Linux's -// ext4_dir_entry_2 struct. The FileName can not be more than 255 bytes so we -// only need 8 bits to store the NameLength. As a result, NameLength has been -// shortened and the other 8 bits are used to encode the file type. Use the -// FileTypeRaw field only if the SbDirentFileType feature is set. -// -// Note: This struct can be of variable size on disk. The one described below -// is of maximum size and the FileName beyond NameLength bytes might contain -// garbage. -type DirentNew struct { - InodeNumber uint32 - RecordLength uint16 - NameLength uint8 - FileTypeRaw uint8 - FileNameRaw [MaxFileName]byte -} - -// Compiles only if DirentNew implements Dirent. -var _ Dirent = (*DirentNew)(nil) - -// Inode implements Dirent.Inode. -func (d *DirentNew) Inode() uint32 { return d.InodeNumber } - -// RecordSize implements Dirent.RecordSize. -func (d *DirentNew) RecordSize() uint16 { return d.RecordLength } - -// FileName implements Dirent.FileName. -func (d *DirentNew) FileName() string { - return string(d.FileNameRaw[:d.NameLength]) -} - -// FileType implements Dirent.FileType. -func (d *DirentNew) FileType() (fs.InodeType, bool) { - if inodeType, ok := inodeTypeByFileType[d.FileTypeRaw]; ok { - return inodeType, true - } - - panic(fmt.Sprintf("unknown file type %v", d.FileTypeRaw)) -} diff --git a/pkg/sentry/fs/ext/disklayout/dirent_old.go b/pkg/sentry/fs/ext/disklayout/dirent_old.go deleted file mode 100644 index 6fff12a6e..000000000 --- a/pkg/sentry/fs/ext/disklayout/dirent_old.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import "gvisor.dev/gvisor/pkg/sentry/fs" - -// DirentOld represents the old directory entry struct which does not contain -// the file type. This emulates Linux's ext4_dir_entry struct. -// -// Note: This struct can be of variable size on disk. The one described below -// is of maximum size and the FileName beyond NameLength bytes might contain -// garbage. -type DirentOld struct { - InodeNumber uint32 - RecordLength uint16 - NameLength uint16 - FileNameRaw [MaxFileName]byte -} - -// Compiles only if DirentOld implements Dirent. -var _ Dirent = (*DirentOld)(nil) - -// Inode implements Dirent.Inode. -func (d *DirentOld) Inode() uint32 { return d.InodeNumber } - -// RecordSize implements Dirent.RecordSize. -func (d *DirentOld) RecordSize() uint16 { return d.RecordLength } - -// FileName implements Dirent.FileName. -func (d *DirentOld) FileName() string { - return string(d.FileNameRaw[:d.NameLength]) -} - -// FileType implements Dirent.FileType. -func (d *DirentOld) FileType() (fs.InodeType, bool) { - return fs.Anonymous, false -} diff --git a/pkg/sentry/fs/ext/disklayout/dirent_test.go b/pkg/sentry/fs/ext/disklayout/dirent_test.go deleted file mode 100644 index 934919f8a..000000000 --- a/pkg/sentry/fs/ext/disklayout/dirent_test.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestDirentSize tests that the dirent structs are of the correct -// size. -func TestDirentSize(t *testing.T) { - assertSize(t, DirentOld{}, uintptr(DirentSize)) - assertSize(t, DirentNew{}, uintptr(DirentSize)) -} diff --git a/pkg/sentry/fs/ext/disklayout/disklayout.go b/pkg/sentry/fs/ext/disklayout/disklayout.go deleted file mode 100644 index bdf4e2132..000000000 --- a/pkg/sentry/fs/ext/disklayout/disklayout.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package disklayout provides Linux ext file system's disk level structures -// which can be directly read into from the underlying device. Structs aim to -// emulate structures `exactly` how they are layed out on disk. -// -// This library aims to be compatible with all ext(2/3/4) systems so it -// provides a generic interface for all major structures and various -// implementations (for different versions). The user code is responsible for -// using appropriate implementations based on the underlying device. -// -// Interfacing all major structures here serves a few purposes: -// - Abstracts away the complexity of the underlying structure from client -// code. The client only has to figure out versioning on set up and then -// can use these as black boxes and pass it higher up the stack. -// - Having pointer receivers forces the user to use pointers to these -// heavy structs. Hence, prevents the client code from unintentionally -// copying these by value while passing the interface around. -// - Version-based implementation selection is resolved on set up hence -// avoiding per call overhead of choosing implementation. -// - All interface methods are pretty light weight (do not take in any -// parameters by design). Passing pointer arguments to interface methods -// can lead to heap allocation as the compiler won't be able to perform -// escape analysis on an unknown implementation at compile time. -// -// Notes: -// - All fields in these structs are exported because binary.Read would -// panic otherwise. -// - All structures on disk are in little-endian order. Only jbd2 (journal) -// structures are in big-endian order. -// - All OS dependent fields in these structures will be interpretted using -// the Linux version of that field. -// - The suffix `Lo` in field names stands for lower bits of that field. -// - The suffix `Hi` in field names stands for upper bits of that field. -// - The suffix `Raw` has been added to indicate that the field is not split -// into Lo and Hi fields and also to resolve name collision with the -// respective interface. -package disklayout diff --git a/pkg/sentry/fs/ext/disklayout/extent.go b/pkg/sentry/fs/ext/disklayout/extent.go deleted file mode 100644 index 567523d32..000000000 --- a/pkg/sentry/fs/ext/disklayout/extent.go +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// Extents were introduced in ext4 and provide huge performance gains in terms -// data locality and reduced metadata block usage. Extents are organized in -// extent trees. The root node is contained in inode.BlocksRaw. -// -// Terminology: -// - Physical Block: -// Filesystem data block which is addressed normally wrt the entire -// filesystem (addressed with 48 bits). -// -// - File Block: -// Data block containing *only* file data and addressed wrt to the file -// with only 32 bits. The (i)th file block contains file data from -// byte (i * sb.BlockSize()) to ((i+1) * sb.BlockSize()). - -const ( - // ExtentStructsSize is the size of all the three extent on-disk structs. - ExtentStructsSize = 12 - - // ExtentMagic is the magic number which must be present in the header. - ExtentMagic = 0xf30a -) - -// ExtentEntryPair couples an in-memory ExtendNode with the ExtentEntry that -// points to it. We want to cache these structs in memory to avoid repeated -// disk reads. -// -// Note: This struct itself does not represent an on-disk struct. -type ExtentEntryPair struct { - // Entry points to the child node on disk. - Entry ExtentEntry - // Node points to child node in memory. Is nil if the current node is a leaf. - Node *ExtentNode -} - -// ExtentNode represents an extent tree node. For internal nodes, all Entries -// will be ExtendIdxs. For leaf nodes, they will all be Extents. -// -// Note: This struct itself does not represent an on-disk struct. -type ExtentNode struct { - Header ExtentHeader - Entries []ExtentEntryPair -} - -// ExtentEntry reprsents an extent tree node entry. The entry can either be -// an ExtentIdx or Extent itself. This exists to simplify navigation logic. -type ExtentEntry interface { - // FileBlock returns the first file block number covered by this entry. - FileBlock() uint32 - - // PhysicalBlock returns the child physical block that this entry points to. - PhysicalBlock() uint64 -} - -// ExtentHeader emulates the ext4_extent_header struct in ext4. Each extent -// tree node begins with this and is followed by `NumEntries` number of: -// - Extent if `Depth` == 0 -// - ExtentIdx otherwise -type ExtentHeader struct { - // Magic in the extent magic number, must be 0xf30a. - Magic uint16 - - // NumEntries indicates the number of valid entries following the header. - NumEntries uint16 - - // MaxEntries that could follow the header. Used while adding entries. - MaxEntries uint16 - - // Height represents the distance of this node from the farthest leaf. Please - // note that Linux incorrectly calls this `Depth` (which means the distance - // of the node from the root). - Height uint16 - _ uint32 -} - -// ExtentIdx emulates the ext4_extent_idx struct in ext4. Only present in -// internal nodes. Sorted in ascending order based on FirstFileBlock since -// Linux does a binary search on this. This points to a block containing the -// child node. -type ExtentIdx struct { - FirstFileBlock uint32 - ChildBlockLo uint32 - ChildBlockHi uint16 - _ uint16 -} - -// Compiles only if ExtentIdx implements ExtentEntry. -var _ ExtentEntry = (*ExtentIdx)(nil) - -// FileBlock implements ExtentEntry.FileBlock. -func (ei *ExtentIdx) FileBlock() uint32 { - return ei.FirstFileBlock -} - -// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the -// physical block number of the child block. -func (ei *ExtentIdx) PhysicalBlock() uint64 { - return (uint64(ei.ChildBlockHi) << 32) | uint64(ei.ChildBlockLo) -} - -// Extent represents the ext4_extent struct in ext4. Only present in leaf -// nodes. Sorted in ascending order based on FirstFileBlock since Linux does a -// binary search on this. This points to an array of data blocks containing the -// file data. It covers `Length` data blocks starting from `StartBlock`. -type Extent struct { - FirstFileBlock uint32 - Length uint16 - StartBlockHi uint16 - StartBlockLo uint32 -} - -// Compiles only if Extent implements ExtentEntry. -var _ ExtentEntry = (*Extent)(nil) - -// FileBlock implements ExtentEntry.FileBlock. -func (e *Extent) FileBlock() uint32 { - return e.FirstFileBlock -} - -// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the -// physical block number of the first data block this extent covers. -func (e *Extent) PhysicalBlock() uint64 { - return (uint64(e.StartBlockHi) << 32) | uint64(e.StartBlockLo) -} diff --git a/pkg/sentry/fs/ext/disklayout/extent_test.go b/pkg/sentry/fs/ext/disklayout/extent_test.go deleted file mode 100644 index b0fad9b71..000000000 --- a/pkg/sentry/fs/ext/disklayout/extent_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestExtentSize tests that the extent structs are of the correct -// size. -func TestExtentSize(t *testing.T) { - assertSize(t, ExtentHeader{}, ExtentStructsSize) - assertSize(t, ExtentIdx{}, ExtentStructsSize) - assertSize(t, Extent{}, ExtentStructsSize) -} diff --git a/pkg/sentry/fs/ext/disklayout/inode.go b/pkg/sentry/fs/ext/disklayout/inode.go deleted file mode 100644 index 88ae913f5..000000000 --- a/pkg/sentry/fs/ext/disklayout/inode.go +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" -) - -// Special inodes. See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#special-inodes. -const ( - // RootDirInode is the inode number of the root directory inode. - RootDirInode = 2 -) - -// The Inode interface must be implemented by structs representing ext inodes. -// The inode stores all the metadata pertaining to the file (except for the -// file name which is held by the directory entry). It does NOT expose all -// fields and should be extended if need be. -// -// Some file systems (e.g. FAT) use the directory entry to store all this -// information. Ext file systems do not so that they can support hard links. -// However, ext4 cheats a little bit and duplicates the file type in the -// directory entry for performance gains. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes. -type Inode interface { - // Mode returns the linux file mode which is majorly used to extract - // information like: - // - File permissions (read/write/execute by user/group/others). - // - Sticky, set UID and GID bits. - // - File type. - // - // Masks to extract this information are provided in pkg/abi/linux/file.go. - Mode() linux.FileMode - - // UID returns the owner UID. - UID() auth.KUID - - // GID returns the owner GID. - GID() auth.KGID - - // Size returns the size of the file in bytes. - Size() uint64 - - // InodeSize returns the size of this inode struct in bytes. - // In ext2 and ext3, the inode struct and inode disk record size was fixed at - // 128 bytes. Ext4 makes it possible for the inode struct to be bigger. - // However, accessing any field beyond the 128 bytes marker must be verified - // using this method. - InodeSize() uint16 - - // AccessTime returns the last access time. Shows when the file was last read. - // - // If InExtendedAttr is set, then this should NOT be used because the - // underlying field is used to store the extended attribute value checksum. - AccessTime() time.Time - - // ChangeTime returns the last change time. Shows when the file meta data - // (like permissions) was last changed. - // - // If InExtendedAttr is set, then this should NOT be used because the - // underlying field is used to store the lower 32 bits of the attribute - // value’s reference count. - ChangeTime() time.Time - - // ModificationTime returns the last modification time. Shows when the file - // content was last modified. - // - // If InExtendedAttr is set, then this should NOT be used because - // the underlying field contains the number of the inode that owns the - // extended attribute. - ModificationTime() time.Time - - // DeletionTime returns the deletion time. Inodes are marked as deleted by - // writing to the underlying field. FS tools can restore files until they are - // actually overwritten. - DeletionTime() time.Time - - // LinksCount returns the number of hard links to this inode. - // - // Normally there is an upper limit on the number of hard links: - // - ext2/ext3 = 32,000 - // - ext4 = 65,000 - // - // This implies that an ext4 directory cannot have more than 64,998 - // subdirectories because each subdirectory will have a hard link to the - // directory via the `..` entry. The directory has hard link via the `.` entry - // of its own. And finally the inode is initiated with 1 hard link (itself). - // - // The underlying value is reset to 1 if all the following hold: - // - Inode is a directory. - // - SbDirNlink is enabled. - // - Number of hard links is incremented past 64,999. - // Hard link value of 1 for a directory would indicate that the number of hard - // links is unknown because a directory can have minimum 2 hard links (itself - // and `.` entry). - LinksCount() uint16 - - // Flags returns InodeFlags which represents the inode flags. - Flags() InodeFlags - - // Data returns the underlying inode.i_block array as a slice so it's - // modifiable. This field is special and is used to store various kinds of - // things depending on the filesystem version and inode type. The underlying - // field name in Linux is a little misleading. - // - In ext2/ext3, it contains the block map. - // - In ext4, it contains the extent tree root node. - // - For inline files, it contains the file contents. - // - For symlinks, it contains the link path (if it fits here). - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block. - Data() []byte -} - -// Inode flags. This is not comprehensive and flags which were not used in -// the Linux kernel have been excluded. -const ( - // InSync indicates that all writes to the file must be synchronous. - InSync = 0x8 - - // InImmutable indicates that this file is immutable. - InImmutable = 0x10 - - // InAppend indicates that this file can only be appended to. - InAppend = 0x20 - - // InNoDump indicates that teh dump(1) utility should not dump this file. - InNoDump = 0x40 - - // InNoAccessTime indicates that the access time of this inode must not be - // updated. - InNoAccessTime = 0x80 - - // InIndex indicates that this directory has hashed indexes. - InIndex = 0x1000 - - // InJournalData indicates that file data must always be written through a - // journal device. - InJournalData = 0x4000 - - // InDirSync indicates that all the directory entiry data must be written - // synchronously. - InDirSync = 0x10000 - - // InTopDir indicates that this inode is at the top of the directory hierarchy. - InTopDir = 0x20000 - - // InHugeFile indicates that this is a huge file. - InHugeFile = 0x40000 - - // InExtents indicates that this inode uses extents. - InExtents = 0x80000 - - // InExtendedAttr indicates that this inode stores a large extended attribute - // value in its data blocks. - InExtendedAttr = 0x200000 - - // InInline indicates that this inode has inline data. - InInline = 0x10000000 - - // InReserved indicates that this inode is reserved for the ext4 library. - InReserved = 0x80000000 -) - -// InodeFlags represents all possible combinations of inode flags. It aims to -// cover the bit masks and provide a more user-friendly interface. -type InodeFlags struct { - Sync bool - Immutable bool - Append bool - NoDump bool - NoAccessTime bool - Index bool - JournalData bool - DirSync bool - TopDir bool - HugeFile bool - Extents bool - ExtendedAttr bool - Inline bool - Reserved bool -} - -// ToInt converts inode flags back to its 32-bit rep. -func (f InodeFlags) ToInt() uint32 { - var res uint32 - - if f.Sync { - res |= InSync - } - if f.Immutable { - res |= InImmutable - } - if f.Append { - res |= InAppend - } - if f.NoDump { - res |= InNoDump - } - if f.NoAccessTime { - res |= InNoAccessTime - } - if f.Index { - res |= InIndex - } - if f.JournalData { - res |= InJournalData - } - if f.DirSync { - res |= InDirSync - } - if f.TopDir { - res |= InTopDir - } - if f.HugeFile { - res |= InHugeFile - } - if f.Extents { - res |= InExtents - } - if f.ExtendedAttr { - res |= InExtendedAttr - } - if f.Inline { - res |= InInline - } - if f.Reserved { - res |= InReserved - } - - return res -} - -// InodeFlagsFromInt converts the integer representation of inode flags to -// a InodeFlags struct. -func InodeFlagsFromInt(f uint32) InodeFlags { - return InodeFlags{ - Sync: f&InSync > 0, - Immutable: f&InImmutable > 0, - Append: f&InAppend > 0, - NoDump: f&InNoDump > 0, - NoAccessTime: f&InNoAccessTime > 0, - Index: f&InIndex > 0, - JournalData: f&InJournalData > 0, - DirSync: f&InDirSync > 0, - TopDir: f&InTopDir > 0, - HugeFile: f&InHugeFile > 0, - Extents: f&InExtents > 0, - ExtendedAttr: f&InExtendedAttr > 0, - Inline: f&InInline > 0, - Reserved: f&InReserved > 0, - } -} - -// These masks define how users can view/modify inode flags. The rest of the -// flags are for internal kernel usage only. -const ( - InUserReadFlagMask = 0x4BDFFF - InUserWriteFlagMask = 0x4B80FF -) diff --git a/pkg/sentry/fs/ext/disklayout/inode_new.go b/pkg/sentry/fs/ext/disklayout/inode_new.go deleted file mode 100644 index 8f9f574ce..000000000 --- a/pkg/sentry/fs/ext/disklayout/inode_new.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import "gvisor.dev/gvisor/pkg/sentry/kernel/time" - -// InodeNew represents ext4 inode structure which can be bigger than -// OldInodeSize. The actual size of this struct should be determined using -// inode.ExtraInodeSize. Accessing any field here should be verified with the -// actual size. The extra space between the end of the inode struct and end of -// the inode record can be used to store extended attr. -// -// If the TimeExtra fields are in scope, the lower 2 bits of those are used -// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits -// are used to provide nanoscond precision. Hence, these timestamps will now -// overflow in May 2446. -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps. -type InodeNew struct { - InodeOld - - ExtraInodeSize uint16 - ChecksumHi uint16 - ChangeTimeExtra uint32 - ModificationTimeExtra uint32 - AccessTimeExtra uint32 - CreationTime uint32 - CreationTimeExtra uint32 - VersionHi uint32 - ProjectID uint32 -} - -// Compiles only if InodeNew implements Inode. -var _ Inode = (*InodeNew)(nil) - -// fromExtraTime decodes the extra time and constructs the kernel time struct -// with nanosecond precision. -func fromExtraTime(lo int32, extra uint32) time.Time { - // See description above InodeNew for format. - seconds := (int64(extra&0x3) << 32) + int64(lo) - nanoseconds := int64(extra >> 2) - return time.FromUnix(seconds, nanoseconds) -} - -// Only override methods which change due to ext4 specific fields. - -// Size implements Inode.Size. -func (in *InodeNew) Size() uint64 { - return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo) -} - -// InodeSize implements Inode.InodeSize. -func (in *InodeNew) InodeSize() uint16 { - return OldInodeSize + in.ExtraInodeSize -} - -// ChangeTime implements Inode.ChangeTime. -func (in *InodeNew) ChangeTime() time.Time { - // Apply new timestamp logic if inode.ChangeTimeExtra is in scope. - if in.ExtraInodeSize >= 8 { - return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra) - } - - return in.InodeOld.ChangeTime() -} - -// ModificationTime implements Inode.ModificationTime. -func (in *InodeNew) ModificationTime() time.Time { - // Apply new timestamp logic if inode.ModificationTimeExtra is in scope. - if in.ExtraInodeSize >= 12 { - return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra) - } - - return in.InodeOld.ModificationTime() -} - -// AccessTime implements Inode.AccessTime. -func (in *InodeNew) AccessTime() time.Time { - // Apply new timestamp logic if inode.AccessTimeExtra is in scope. - if in.ExtraInodeSize >= 16 { - return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra) - } - - return in.InodeOld.AccessTime() -} diff --git a/pkg/sentry/fs/ext/disklayout/inode_old.go b/pkg/sentry/fs/ext/disklayout/inode_old.go deleted file mode 100644 index db25b11b6..000000000 --- a/pkg/sentry/fs/ext/disklayout/inode_old.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" -) - -const ( - // OldInodeSize is the inode size in ext2/ext3. - OldInodeSize = 128 -) - -// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct. -// Inode struct size and record size are both 128 bytes for this. -// -// All fields representing time are in seconds since the epoch. Which means that -// they will overflow in January 2038. -type InodeOld struct { - ModeRaw uint16 - UIDLo uint16 - SizeLo uint32 - - // The time fields are signed integers because they could be negative to - // represent time before the epoch. - AccessTimeRaw int32 - ChangeTimeRaw int32 - ModificationTimeRaw int32 - DeletionTimeRaw int32 - - GIDLo uint16 - LinksCountRaw uint16 - BlocksCountLo uint32 - FlagsRaw uint32 - VersionLo uint32 // This is OS dependent. - DataRaw [60]byte - Generation uint32 - FileACLLo uint32 - SizeHi uint32 - ObsoFaddr uint32 - - // OS dependent fields have been inlined here. - BlocksCountHi uint16 - FileACLHi uint16 - UIDHi uint16 - GIDHi uint16 - ChecksumLo uint16 - _ uint16 -} - -// Compiles only if InodeOld implements Inode. -var _ Inode = (*InodeOld)(nil) - -// Mode implements Inode.Mode. -func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) } - -// UID implements Inode.UID. -func (in *InodeOld) UID() auth.KUID { - return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo)) -} - -// GID implements Inode.GID. -func (in *InodeOld) GID() auth.KGID { - return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo)) -} - -// Size implements Inode.Size. -func (in *InodeOld) Size() uint64 { - // In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL. - return uint64(in.SizeLo) -} - -// InodeSize implements Inode.InodeSize. -func (in *InodeOld) InodeSize() uint16 { return OldInodeSize } - -// AccessTime implements Inode.AccessTime. -func (in *InodeOld) AccessTime() time.Time { - return time.FromUnix(int64(in.AccessTimeRaw), 0) -} - -// ChangeTime implements Inode.ChangeTime. -func (in *InodeOld) ChangeTime() time.Time { - return time.FromUnix(int64(in.ChangeTimeRaw), 0) -} - -// ModificationTime implements Inode.ModificationTime. -func (in *InodeOld) ModificationTime() time.Time { - return time.FromUnix(int64(in.ModificationTimeRaw), 0) -} - -// DeletionTime implements Inode.DeletionTime. -func (in *InodeOld) DeletionTime() time.Time { - return time.FromUnix(int64(in.DeletionTimeRaw), 0) -} - -// LinksCount implements Inode.LinksCount. -func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw } - -// Flags implements Inode.Flags. -func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) } - -// Data implements Inode.Data. -func (in *InodeOld) Data() []byte { return in.DataRaw[:] } diff --git a/pkg/sentry/fs/ext/disklayout/inode_test.go b/pkg/sentry/fs/ext/disklayout/inode_test.go deleted file mode 100644 index dd03ee50e..000000000 --- a/pkg/sentry/fs/ext/disklayout/inode_test.go +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "fmt" - "strconv" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/kernel/time" -) - -// TestInodeSize tests that the inode structs are of the correct size. -func TestInodeSize(t *testing.T) { - assertSize(t, InodeOld{}, OldInodeSize) - - // This was updated from 156 bytes to 160 bytes in Oct 2015. - assertSize(t, InodeNew{}, 160) -} - -// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in -// ext4 inode structs are decoded correctly. -// -// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps. -func TestTimestampSeconds(t *testing.T) { - type timestampTest struct { - // msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set. - // If this is set then the 32-bit time is negative. - msbSet bool - - // lowerBound tells if we should take the lowest possible value of - // InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to - // false it tells to take the highest possible value. - lowerBound bool - - // extraBits is InodeNew.[X]TimeExtra. - extraBits uint32 - - // want is the kernel time struct that is expected. - want time.Time - } - - tests := []timestampTest{ - // 1901-12-13 - { - msbSet: true, - lowerBound: true, - extraBits: 0, - want: time.FromUnix(int64(-0x80000000), 0), - }, - - // 1969-12-31 - { - msbSet: true, - lowerBound: false, - extraBits: 0, - want: time.FromUnix(int64(-1), 0), - }, - - // 1970-01-01 - { - msbSet: false, - lowerBound: true, - extraBits: 0, - want: time.FromUnix(int64(0), 0), - }, - - // 2038-01-19 - { - msbSet: false, - lowerBound: false, - extraBits: 0, - want: time.FromUnix(int64(0x7fffffff), 0), - }, - - // 2038-01-19 - { - msbSet: true, - lowerBound: true, - extraBits: 1, - want: time.FromUnix(int64(0x80000000), 0), - }, - - // 2106-02-07 - { - msbSet: true, - lowerBound: false, - extraBits: 1, - want: time.FromUnix(int64(0xffffffff), 0), - }, - - // 2106-02-07 - { - msbSet: false, - lowerBound: true, - extraBits: 1, - want: time.FromUnix(int64(0x100000000), 0), - }, - - // 2174-02-25 - { - msbSet: false, - lowerBound: false, - extraBits: 1, - want: time.FromUnix(int64(0x17fffffff), 0), - }, - - // 2174-02-25 - { - msbSet: true, - lowerBound: true, - extraBits: 2, - want: time.FromUnix(int64(0x180000000), 0), - }, - - // 2242-03-16 - { - msbSet: true, - lowerBound: false, - extraBits: 2, - want: time.FromUnix(int64(0x1ffffffff), 0), - }, - - // 2242-03-16 - { - msbSet: false, - lowerBound: true, - extraBits: 2, - want: time.FromUnix(int64(0x200000000), 0), - }, - - // 2310-04-04 - { - msbSet: false, - lowerBound: false, - extraBits: 2, - want: time.FromUnix(int64(0x27fffffff), 0), - }, - - // 2310-04-04 - { - msbSet: true, - lowerBound: true, - extraBits: 3, - want: time.FromUnix(int64(0x280000000), 0), - }, - - // 2378-04-22 - { - msbSet: true, - lowerBound: false, - extraBits: 3, - want: time.FromUnix(int64(0x2ffffffff), 0), - }, - - // 2378-04-22 - { - msbSet: false, - lowerBound: true, - extraBits: 3, - want: time.FromUnix(int64(0x300000000), 0), - }, - - // 2446-05-10 - { - msbSet: false, - lowerBound: false, - extraBits: 3, - want: time.FromUnix(int64(0x37fffffff), 0), - }, - } - - lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000 - upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111 - lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000 - upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111 - - get32BitTime := func(test timestampTest) int32 { - if test.msbSet { - if test.lowerBound { - return lowerMSB1 - } - - return upperMSB1 - } - - if test.lowerBound { - return lowerMSB0 - } - - return upperMSB0 - } - - getTestName := func(test timestampTest) string { - return fmt.Sprintf( - "Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t", - strconv.FormatInt(int64(test.extraBits), 2), - test.msbSet, - test.lowerBound, - ) - } - - for _, test := range tests { - t.Run(getTestName(test), func(t *testing.T) { - if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want { - t.Errorf("Expected: %v, Got: %v", test.want, got) - } - }) - } -} diff --git a/pkg/sentry/fs/ext/disklayout/superblock.go b/pkg/sentry/fs/ext/disklayout/superblock.go deleted file mode 100644 index 8bb327006..000000000 --- a/pkg/sentry/fs/ext/disklayout/superblock.go +++ /dev/null @@ -1,471 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -const ( - // SbOffset is the absolute offset at which the superblock is placed. - SbOffset = 1024 -) - -// SuperBlock should be implemented by structs representing the ext superblock. -// The superblock holds a lot of information about the enclosing filesystem. -// This interface aims to provide access methods to important information held -// by the superblock. It does NOT expose all fields of the superblock, only the -// ones necessary. This can be expanded when need be. -// -// Location and replication: -// - The superblock is located at offset 1024 in block group 0. -// - Redundant copies of the superblock and group descriptors are kept in -// all groups if SbSparse feature flag is NOT set. If it is set, the -// replicas only exist in groups whose group number is either 0 or a -// power of 3, 5, or 7. -// - There is also a sparse superblock feature v2 in which there are just -// two replicas saved in the block groups pointed by sb.s_backup_bgs. -// -// Replicas should eventually be updated if the superblock is updated. -// -// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block. -type SuperBlock interface { - // InodesCount returns the total number of inodes in this filesystem. - InodesCount() uint32 - - // BlocksCount returns the total number of data blocks in this filesystem. - BlocksCount() uint64 - - // FreeBlocksCount returns the number of free blocks in this filesystem. - FreeBlocksCount() uint64 - - // FreeInodesCount returns the number of free inodes in this filesystem. - FreeInodesCount() uint32 - - // MountCount returns the number of mounts since the last fsck. - MountCount() uint16 - - // MaxMountCount returns the number of mounts allowed beyond which a fsck is - // needed. - MaxMountCount() uint16 - - // FirstDataBlock returns the absolute block number of the first data block, - // which contains the super block itself. - // - // If the filesystem has 1kb data blocks then this should return 1. For all - // other configurations, this typically returns 0. - FirstDataBlock() uint32 - - // BlockSize returns the size of one data block in this filesystem. - // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that - // the smallest block size is 1kb. - BlockSize() uint64 - - // BlocksPerGroup returns the number of data blocks in a block group. - BlocksPerGroup() uint32 - - // ClusterSize returns block cluster size (set during mkfs time by admin). - // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that - // the smallest cluster size is 1kb. - // - // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature - // is NOT set and consequently BlockSize() = ClusterSize() in that case. - ClusterSize() uint64 - - // ClustersPerGroup returns: - // - number of clusters per group if bigalloc is enabled. - // - BlocksPerGroup() otherwise. - ClustersPerGroup() uint32 - - // InodeSize returns the size of the inode disk record size in bytes. Use this - // to iterate over inode arrays on disk. - // - // In ext2 and ext3: - // - Each inode had a disk record of 128 bytes. - // - The inode struct size was fixed at 128 bytes. - // - // In ext4 its possible to allocate larger on-disk inodes: - // - Inode disk record size = sb.s_inode_size (function return value). - // = 256 (default) - // - Inode struct size = 128 + inode.i_extra_isize. - // = 128 + 32 = 160 (default) - InodeSize() uint16 - - // InodesPerGroup returns the number of inodes in a block group. - InodesPerGroup() uint32 - - // BgDescSize returns the size of the block group descriptor struct. - // - // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor - // is only 32 bytes long. - // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST - // 64 bytes. It might be bigger than that. - BgDescSize() uint16 - - // CompatibleFeatures returns the CompatFeatures struct which holds all the - // compatible features this fs supports. - CompatibleFeatures() CompatFeatures - - // IncompatibleFeatures returns the CompatFeatures struct which holds all the - // incompatible features this fs supports. - IncompatibleFeatures() IncompatFeatures - - // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the - // readonly compatible features this fs supports. - ReadOnlyCompatibleFeatures() RoCompatFeatures - - // Magic() returns the magic signature which must be 0xef53. - Magic() uint16 - - // Revision returns the superblock revision. Superblock struct fields from - // offset 0x54 till 0x150 should only be used if superblock has DynamicRev. - Revision() SbRevision -} - -// SbRevision is the type for superblock revisions. -type SbRevision uint32 - -// Super block revisions. -const ( - // OldRev is the good old (original) format. - OldRev SbRevision = 0 - - // DynamicRev is v2 format w/ dynamic inode sizes. - DynamicRev SbRevision = 1 -) - -// Superblock compatible features. -// This is not exhaustive, unused features are not listed. -const ( - // SbDirPrealloc indicates directory preallocation. - SbDirPrealloc = 0x1 - - // SbHasJournal indicates the presence of a journal. jbd2 should only work - // with this being set. - SbHasJournal = 0x4 - - // SbExtAttr indicates extended attributes support. - SbExtAttr = 0x8 - - // SbResizeInode indicates that the fs has reserved GDT blocks (right after - // group descriptors) for fs expansion. - SbResizeInode = 0x10 - - // SbDirIndex indicates that the fs has directory indices. - SbDirIndex = 0x20 - - // SbSparseV2 stands for Sparse superblock version 2. - SbSparseV2 = 0x200 -) - -// CompatFeatures represents a superblock's compatible feature set. If the -// kernel does not understand any of these feature, it can still read/write -// to this fs. -type CompatFeatures struct { - DirPrealloc bool - HasJournal bool - ExtAttr bool - ResizeInode bool - DirIndex bool - SparseV2 bool -} - -// ToInt converts superblock compatible features back to its 32-bit rep. -func (f CompatFeatures) ToInt() uint32 { - var res uint32 - - if f.DirPrealloc { - res |= SbDirPrealloc - } - if f.HasJournal { - res |= SbHasJournal - } - if f.ExtAttr { - res |= SbExtAttr - } - if f.ResizeInode { - res |= SbResizeInode - } - if f.DirIndex { - res |= SbDirIndex - } - if f.SparseV2 { - res |= SbSparseV2 - } - - return res -} - -// CompatFeaturesFromInt converts the integer representation of superblock -// compatible features to CompatFeatures struct. -func CompatFeaturesFromInt(f uint32) CompatFeatures { - return CompatFeatures{ - DirPrealloc: f&SbDirPrealloc > 0, - HasJournal: f&SbHasJournal > 0, - ExtAttr: f&SbExtAttr > 0, - ResizeInode: f&SbResizeInode > 0, - DirIndex: f&SbDirIndex > 0, - SparseV2: f&SbSparseV2 > 0, - } -} - -// Superblock incompatible features. -// This is not exhaustive, unused features are not listed. -const ( - // SbDirentFileType indicates that directory entries record the file type. - // We should use struct DirentNew for dirents then. - SbDirentFileType = 0x2 - - // SbRecovery indicates that the filesystem needs recovery. - SbRecovery = 0x4 - - // SbJournalDev indicates that the filesystem has a separate journal device. - SbJournalDev = 0x8 - - // SbMetaBG indicates that the filesystem is using Meta block groups. Moves - // the group descriptors from the congested first block group into the first - // group of each metablock group to increase the maximum block groups limit - // and hence support much larger filesystems. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups. - SbMetaBG = 0x10 - - // SbExtents indicates that the filesystem uses extents. Must be set in ext4 - // filesystems. - SbExtents = 0x40 - - // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits. - // Hence can support 2^64 data blocks. - SbIs64Bit = 0x80 - - // SbMMP indicates that this filesystem has multiple mount protection. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection. - SbMMP = 0x100 - - // SbFlexBg indicates that this filesystem has flexible block groups. Several - // block groups are tied into one logical block group so that all the metadata - // for the block groups (bitmaps and inode tables) are close together for - // faster loading. Consequently, large files will be continuous on disk. - // However, this does not affect the placement of redundant superblocks and - // group descriptors. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups. - SbFlexBg = 0x200 - - // SbLargeDir shows that large directory enabled. Directory htree can be 3 - // levels deep. Directory htrees are allowed to be 2 levels deep otherwise. - SbLargeDir = 0x4000 - - // SbInlineData allows inline data in inodes for really small files. - SbInlineData = 0x8000 - - // SbEncrypted indicates that this fs contains encrypted inodes. - SbEncrypted = 0x10000 -) - -// IncompatFeatures represents a superblock's incompatible feature set. If the -// kernel does not understand any of these feature, it should refuse to mount. -type IncompatFeatures struct { - DirentFileType bool - Recovery bool - JournalDev bool - MetaBG bool - Extents bool - Is64Bit bool - MMP bool - FlexBg bool - LargeDir bool - InlineData bool - Encrypted bool -} - -// ToInt converts superblock incompatible features back to its 32-bit rep. -func (f IncompatFeatures) ToInt() uint32 { - var res uint32 - - if f.DirentFileType { - res |= SbDirentFileType - } - if f.Recovery { - res |= SbRecovery - } - if f.JournalDev { - res |= SbJournalDev - } - if f.MetaBG { - res |= SbMetaBG - } - if f.Extents { - res |= SbExtents - } - if f.Is64Bit { - res |= SbIs64Bit - } - if f.MMP { - res |= SbMMP - } - if f.FlexBg { - res |= SbFlexBg - } - if f.LargeDir { - res |= SbLargeDir - } - if f.InlineData { - res |= SbInlineData - } - if f.Encrypted { - res |= SbEncrypted - } - - return res -} - -// IncompatFeaturesFromInt converts the integer representation of superblock -// incompatible features to IncompatFeatures struct. -func IncompatFeaturesFromInt(f uint32) IncompatFeatures { - return IncompatFeatures{ - DirentFileType: f&SbDirentFileType > 0, - Recovery: f&SbRecovery > 0, - JournalDev: f&SbJournalDev > 0, - MetaBG: f&SbMetaBG > 0, - Extents: f&SbExtents > 0, - Is64Bit: f&SbIs64Bit > 0, - MMP: f&SbMMP > 0, - FlexBg: f&SbFlexBg > 0, - LargeDir: f&SbLargeDir > 0, - InlineData: f&SbInlineData > 0, - Encrypted: f&SbEncrypted > 0, - } -} - -// Superblock readonly compatible features. -// This is not exhaustive, unused features are not listed. -const ( - // SbSparse indicates sparse superblocks. Only groups with number either 0 or - // a power of 3, 5, or 7 will have redundant copies of the superblock and - // block descriptors. - SbSparse = 0x1 - - // SbLargeFile indicates that this fs has been used to store a file >= 2GiB. - SbLargeFile = 0x2 - - // SbHugeFile indicates that this fs contains files whose sizes are - // represented in units of logicals blocks, not 512-byte sectors. - SbHugeFile = 0x8 - - // SbGdtCsum indicates that group descriptors have checksums. - SbGdtCsum = 0x10 - - // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a - // 32,000 subdirectory limit. - SbDirNlink = 0x20 - - // SbExtraIsize indicates that large inodes exist on this filesystem. - SbExtraIsize = 0x40 - - // SbHasSnapshot indicates the existence of a snapshot. - SbHasSnapshot = 0x80 - - // SbQuota enables usage tracking for all quota types. - SbQuota = 0x100 - - // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation - // unit becomes a cluster rather than a data block. Then block bitmaps track - // clusters, not data blocks. - // - // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc. - SbBigalloc = 0x200 - - // SbMetadataCsum indicates that the fs supports metadata checksumming. - SbMetadataCsum = 0x400 - - // SbReadOnly marks this filesystem as readonly. Should refuse to mount in - // read/write mode. - SbReadOnly = 0x1000 -) - -// RoCompatFeatures represents a superblock's readonly compatible feature set. -// If the kernel does not understand any of these feature, it can still mount -// readonly. But if the user wants to mount read/write, the kernel should -// refuse to mount. -type RoCompatFeatures struct { - Sparse bool - LargeFile bool - HugeFile bool - GdtCsum bool - DirNlink bool - ExtraIsize bool - HasSnapshot bool - Quota bool - Bigalloc bool - MetadataCsum bool - ReadOnly bool -} - -// ToInt converts superblock readonly compatible features to its 32-bit rep. -func (f RoCompatFeatures) ToInt() uint32 { - var res uint32 - - if f.Sparse { - res |= SbSparse - } - if f.LargeFile { - res |= SbLargeFile - } - if f.HugeFile { - res |= SbHugeFile - } - if f.GdtCsum { - res |= SbGdtCsum - } - if f.DirNlink { - res |= SbDirNlink - } - if f.ExtraIsize { - res |= SbExtraIsize - } - if f.HasSnapshot { - res |= SbHasSnapshot - } - if f.Quota { - res |= SbQuota - } - if f.Bigalloc { - res |= SbBigalloc - } - if f.MetadataCsum { - res |= SbMetadataCsum - } - if f.ReadOnly { - res |= SbReadOnly - } - - return res -} - -// RoCompatFeaturesFromInt converts the integer representation of superblock -// readonly compatible features to RoCompatFeatures struct. -func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures { - return RoCompatFeatures{ - Sparse: f&SbSparse > 0, - LargeFile: f&SbLargeFile > 0, - HugeFile: f&SbHugeFile > 0, - GdtCsum: f&SbGdtCsum > 0, - DirNlink: f&SbDirNlink > 0, - ExtraIsize: f&SbExtraIsize > 0, - HasSnapshot: f&SbHasSnapshot > 0, - Quota: f&SbQuota > 0, - Bigalloc: f&SbBigalloc > 0, - MetadataCsum: f&SbMetadataCsum > 0, - ReadOnly: f&SbReadOnly > 0, - } -} diff --git a/pkg/sentry/fs/ext/disklayout/superblock_32.go b/pkg/sentry/fs/ext/disklayout/superblock_32.go deleted file mode 100644 index 53e515fd3..000000000 --- a/pkg/sentry/fs/ext/disklayout/superblock_32.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of -// the ext4_super_block struct in fs/ext4/ext4.h. Should be used only if -// RevLevel = DynamicRev and 64-bit feature is disabled. -type SuperBlock32Bit struct { - // We embed the old superblock struct here because the 32-bit version is just - // an extension of the old version. - SuperBlockOld - - FirstInode uint32 - InodeSizeRaw uint16 - BlockGroupNumber uint16 - FeatureCompat uint32 - FeatureIncompat uint32 - FeatureRoCompat uint32 - UUID [16]byte - VolumeName [16]byte - LastMounted [64]byte - AlgoUsageBitmap uint32 - PreallocBlocks uint8 - PreallocDirBlocks uint8 - ReservedGdtBlocks uint16 - JournalUUID [16]byte - JournalInum uint32 - JournalDev uint32 - LastOrphan uint32 - HashSeed [4]uint32 - DefaultHashVersion uint8 - JnlBackupType uint8 - BgDescSizeRaw uint16 - DefaultMountOpts uint32 - FirstMetaBg uint32 - MkfsTime uint32 - JnlBlocks [17]uint32 -} - -// Compiles only if SuperBlock32Bit implements SuperBlock. -var _ SuperBlock = (*SuperBlock32Bit)(nil) - -// Only override methods which change based on the additional fields above. -// Not overriding SuperBlock.BgDescSize because it would still return 32 here. - -// InodeSize implements SuperBlock.InodeSize. -func (sb *SuperBlock32Bit) InodeSize() uint16 { - return sb.InodeSizeRaw -} - -// CompatibleFeatures implements SuperBlock.CompatibleFeatures. -func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures { - return CompatFeaturesFromInt(sb.FeatureCompat) -} - -// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. -func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures { - return IncompatFeaturesFromInt(sb.FeatureIncompat) -} - -// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. -func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures { - return RoCompatFeaturesFromInt(sb.FeatureRoCompat) -} diff --git a/pkg/sentry/fs/ext/disklayout/superblock_64.go b/pkg/sentry/fs/ext/disklayout/superblock_64.go deleted file mode 100644 index 7c1053fb4..000000000 --- a/pkg/sentry/fs/ext/disklayout/superblock_64.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of -// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly -// 1024 bytes (smallest possible block size) and hence the superblock always -// fits in no more than one data block. Should only be used when the 64-bit -// feature is set. -type SuperBlock64Bit struct { - // We embed the 32-bit struct here because 64-bit version is just an extension - // of the 32-bit version. - SuperBlock32Bit - - BlocksCountHi uint32 - ReservedBlocksCountHi uint32 - FreeBlocksCountHi uint32 - MinInodeSize uint16 - WantInodeSize uint16 - Flags uint32 - RaidStride uint16 - MmpInterval uint16 - MmpBlock uint64 - RaidStripeWidth uint32 - LogGroupsPerFlex uint8 - ChecksumType uint8 - _ uint16 - KbytesWritten uint64 - SnapshotInum uint32 - SnapshotID uint32 - SnapshotRsrvBlocksCount uint64 - SnapshotList uint32 - ErrorCount uint32 - FirstErrorTime uint32 - FirstErrorInode uint32 - FirstErrorBlock uint64 - FirstErrorFunction [32]byte - FirstErrorLine uint32 - LastErrorTime uint32 - LastErrorInode uint32 - LastErrorLine uint32 - LastErrorBlock uint64 - LastErrorFunction [32]byte - MountOpts [64]byte - UserQuotaInum uint32 - GroupQuotaInum uint32 - OverheadBlocks uint32 - BackupBgs [2]uint32 - EncryptAlgos [4]uint8 - EncryptPwSalt [16]uint8 - LostFoundInode uint32 - ProjectQuotaInode uint32 - ChecksumSeed uint32 - WtimeHi uint8 - MtimeHi uint8 - MkfsTimeHi uint8 - LastCheckHi uint8 - FirstErrorTimeHi uint8 - LastErrorTimeHi uint8 - _ [2]uint8 - Encoding uint16 - EncodingFlags uint16 - _ [95]uint32 - Checksum uint32 -} - -// Compiles only if SuperBlock64Bit implements SuperBlock. -var _ SuperBlock = (*SuperBlock64Bit)(nil) - -// Only override methods which change based on the 64-bit feature. - -// BlocksCount implements SuperBlock.BlocksCount. -func (sb *SuperBlock64Bit) BlocksCount() uint64 { - return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo) -} - -// FreeBlocksCount implements SuperBlock.FreeBlocksCount. -func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 { - return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo) -} - -// BgDescSize implements SuperBlock.BgDescSize. -func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw } diff --git a/pkg/sentry/fs/ext/disklayout/superblock_old.go b/pkg/sentry/fs/ext/disklayout/superblock_old.go deleted file mode 100644 index 9221e0251..000000000 --- a/pkg/sentry/fs/ext/disklayout/superblock_old.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -// SuperBlockOld implements SuperBlock and represents the old version of the -// superblock struct. Should be used only if RevLevel = OldRev. -type SuperBlockOld struct { - InodesCountRaw uint32 - BlocksCountLo uint32 - ReservedBlocksCount uint32 - FreeBlocksCountLo uint32 - FreeInodesCountRaw uint32 - FirstDataBlockRaw uint32 - LogBlockSize uint32 - LogClusterSize uint32 - BlocksPerGroupRaw uint32 - ClustersPerGroupRaw uint32 - InodesPerGroupRaw uint32 - Mtime uint32 - Wtime uint32 - MountCountRaw uint16 - MaxMountCountRaw uint16 - MagicRaw uint16 - State uint16 - Errors uint16 - MinorRevLevel uint16 - LastCheck uint32 - CheckInterval uint32 - CreatorOS uint32 - RevLevel uint32 - DefResUID uint16 - DefResGID uint16 -} - -// Compiles only if SuperBlockOld implements SuperBlock. -var _ SuperBlock = (*SuperBlockOld)(nil) - -// InodesCount implements SuperBlock.InodesCount. -func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw } - -// BlocksCount implements SuperBlock.BlocksCount. -func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) } - -// FreeBlocksCount implements SuperBlock.FreeBlocksCount. -func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) } - -// FreeInodesCount implements SuperBlock.FreeInodesCount. -func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw } - -// MountCount implements SuperBlock.MountCount. -func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw } - -// MaxMountCount implements SuperBlock.MaxMountCount. -func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw } - -// FirstDataBlock implements SuperBlock.FirstDataBlock. -func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw } - -// BlockSize implements SuperBlock.BlockSize. -func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) } - -// BlocksPerGroup implements SuperBlock.BlocksPerGroup. -func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw } - -// ClusterSize implements SuperBlock.ClusterSize. -func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) } - -// ClustersPerGroup implements SuperBlock.ClustersPerGroup. -func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw } - -// InodeSize implements SuperBlock.InodeSize. -func (sb *SuperBlockOld) InodeSize() uint16 { return OldInodeSize } - -// InodesPerGroup implements SuperBlock.InodesPerGroup. -func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw } - -// BgDescSize implements SuperBlock.BgDescSize. -func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 } - -// CompatibleFeatures implements SuperBlock.CompatibleFeatures. -func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} } - -// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. -func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} } - -// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. -func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} } - -// Magic implements SuperBlock.Magic. -func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw } - -// Revision implements SuperBlock.Revision. -func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) } diff --git a/pkg/sentry/fs/ext/disklayout/superblock_test.go b/pkg/sentry/fs/ext/disklayout/superblock_test.go deleted file mode 100644 index 463b5ba21..000000000 --- a/pkg/sentry/fs/ext/disklayout/superblock_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "testing" -) - -// TestSuperBlockSize tests that the superblock structs are of the correct -// size. -func TestSuperBlockSize(t *testing.T) { - assertSize(t, SuperBlockOld{}, 84) - assertSize(t, SuperBlock32Bit{}, 336) - assertSize(t, SuperBlock64Bit{}, 1024) -} diff --git a/pkg/sentry/fs/ext/disklayout/test_utils.go b/pkg/sentry/fs/ext/disklayout/test_utils.go deleted file mode 100644 index 9c63f04c0..000000000 --- a/pkg/sentry/fs/ext/disklayout/test_utils.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package disklayout - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/binary" -) - -func assertSize(t *testing.T, v interface{}, want uintptr) { - t.Helper() - - if got := binary.Size(v); got != want { - t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got) - } -} diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go deleted file mode 100644 index c3e2c9efb..000000000 --- a/pkg/sentry/fs/ext/ext.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package ext implements readonly ext(2/3/4) filesystems. -package ext - -import ( - "errors" - "fmt" - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// filesystemType implements vfs.FilesystemType. -type filesystemType struct{} - -// Compiles only if filesystemType implements vfs.FilesystemType. -var _ vfs.FilesystemType = (*filesystemType)(nil) - -// getDeviceFd returns an io.ReaderAt to the underlying device. -// Currently there are two ways of mounting an ext(2/3/4) fs: -// 1. Specify a mount with our internal special MountType in the OCI spec. -// 2. Expose the device to the container and mount it from application layer. -func getDeviceFd(source string, opts vfs.NewFilesystemOptions) (io.ReaderAt, error) { - if opts.InternalData == nil { - // User mount call. - // TODO(b/134676337): Open the device specified by `source` and return that. - panic("unimplemented") - } - - // NewFilesystem call originated from within the sentry. - devFd, ok := opts.InternalData.(int) - if !ok { - return nil, errors.New("internal data for ext fs must be an int containing the file descriptor to device") - } - - if devFd < 0 { - return nil, fmt.Errorf("ext device file descriptor is not valid: %d", devFd) - } - - // The fd.ReadWriter returned from fd.NewReadWriter() does not take ownership - // of the file descriptor and hence will not close it when it is garbage - // collected. - return fd.NewReadWriter(devFd), nil -} - -// isCompatible checks if the superblock has feature sets which are compatible. -// We only need to check the superblock incompatible feature set since we are -// mounting readonly. We will also need to check readonly compatible feature -// set when mounting for read/write. -func isCompatible(sb disklayout.SuperBlock) bool { - // Please note that what is being checked is limited based on the fact that we - // are mounting readonly and that we are not journaling. When mounting - // read/write or with a journal, this must be reevaluated. - incompatFeatures := sb.IncompatibleFeatures() - if incompatFeatures.MetaBG { - log.Warningf("ext fs: meta block groups are not supported") - return false - } - if incompatFeatures.MMP { - log.Warningf("ext fs: multiple mount protection is not supported") - return false - } - if incompatFeatures.Encrypted { - log.Warningf("ext fs: encrypted inodes not supported") - return false - } - if incompatFeatures.InlineData { - log.Warningf("ext fs: inline files not supported") - return false - } - return true -} - -// NewFilesystem implements vfs.FilesystemType.NewFilesystem. -func (fstype filesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - // TODO(b/134676337): Ensure that the user is mounting readonly. If not, - // EACCESS should be returned according to mount(2). Filesystem independent - // flags (like readonly) are currently not available in pkg/sentry/vfs. - - dev, err := getDeviceFd(source, opts) - if err != nil { - return nil, nil, err - } - - fs := filesystem{dev: dev, inodeCache: make(map[uint32]*inode)} - fs.vfsfs.Init(&fs) - fs.sb, err = readSuperBlock(dev) - if err != nil { - return nil, nil, err - } - - if fs.sb.Magic() != linux.EXT_SUPER_MAGIC { - // mount(2) specifies that EINVAL should be returned if the superblock is - // invalid. - return nil, nil, syserror.EINVAL - } - - // Refuse to mount if the filesystem is incompatible. - if !isCompatible(fs.sb) { - return nil, nil, syserror.EINVAL - } - - fs.bgs, err = readBlockGroups(dev, fs.sb) - if err != nil { - return nil, nil, err - } - - rootInode, err := fs.getOrCreateInodeLocked(disklayout.RootDirInode) - if err != nil { - return nil, nil, err - } - rootInode.incRef() - - return &fs.vfsfs, &newDentry(rootInode).vfsd, nil -} diff --git a/pkg/sentry/fs/ext/ext_test.go b/pkg/sentry/fs/ext/ext_test.go deleted file mode 100644 index 270f38074..000000000 --- a/pkg/sentry/fs/ext/ext_test.go +++ /dev/null @@ -1,917 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "fmt" - "io" - "os" - "path" - "sort" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/runsc/test/testutil" -) - -const ( - assetsDir = "pkg/sentry/fs/ext/assets" -) - -var ( - ext2ImagePath = path.Join(assetsDir, "tiny.ext2") - ext3ImagePath = path.Join(assetsDir, "tiny.ext3") - ext4ImagePath = path.Join(assetsDir, "tiny.ext4") -) - -// setUp opens imagePath as an ext Filesystem and returns all necessary -// elements required to run tests. If error is non-nil, it also returns a tear -// down function which must be called after the test is run for clean up. -func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesystem, *vfs.VirtualDentry, func(), error) { - localImagePath, err := testutil.FindFile(imagePath) - if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to open local image at path %s: %v", imagePath, err) - } - - f, err := os.Open(localImagePath) - if err != nil { - return nil, nil, nil, nil, err - } - - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("extfs", filesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.NewFilesystemOptions{InternalData: int(f.Fd())}) - if err != nil { - f.Close() - return nil, nil, nil, nil, err - } - - root := mntns.Root() - - tearDown := func() { - root.DecRef() - - if err := f.Close(); err != nil { - t.Fatalf("tearDown failed: %v", err) - } - } - return ctx, vfsObj, &root, tearDown, nil -} - -// TODO(b/134676337): Test vfs.FilesystemImpl.ReadlinkAt and -// vfs.FilesystemImpl.StatFSAt which are not implemented in -// vfs.VirtualFilesystem yet. - -// TestSeek tests vfs.FileDescriptionImpl.Seek functionality. -func TestSeek(t *testing.T) { - type seekTest struct { - name string - image string - path string - } - - tests := []seekTest{ - { - name: "ext4 root dir seek", - image: ext4ImagePath, - path: "/", - }, - { - name: "ext3 root dir seek", - image: ext3ImagePath, - path: "/", - }, - { - name: "ext2 root dir seek", - image: ext2ImagePath, - path: "/", - }, - { - name: "ext4 reg file seek", - image: ext4ImagePath, - path: "/file.txt", - }, - { - name: "ext3 reg file seek", - image: ext3ImagePath, - path: "/file.txt", - }, - { - name: "ext2 reg file seek", - image: ext2ImagePath, - path: "/file.txt", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fd, err := vfsfs.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt failed: %v", err) - } - - if n, err := fd.Impl().Seek(ctx, 0, linux.SEEK_SET); n != 0 || err != nil { - t.Errorf("expected seek position 0, got %d and error %v", n, err) - } - - stat, err := fd.Impl().Stat(ctx, vfs.StatOptions{}) - if err != nil { - t.Errorf("fd.stat failed for file %s in image %s: %v", test.path, test.image, err) - } - - // We should be able to seek beyond the end of file. - size := int64(stat.Size) - if n, err := fd.Impl().Seek(ctx, size, linux.SEEK_SET); n != size || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size, n, err) - } - - // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Impl().Seek(ctx, -1, linux.SEEK_SET); err != syserror.EINVAL { - t.Errorf("expected error EINVAL but got %v", err) - } - - if n, err := fd.Impl().Seek(ctx, 3, linux.SEEK_CUR); n != size+3 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size+3, n, err) - } - - // Make sure negative offsets work with SEEK_CUR. - if n, err := fd.Impl().Seek(ctx, -2, linux.SEEK_CUR); n != size+1 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err) - } - - // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Impl().Seek(ctx, -(size + 2), linux.SEEK_CUR); err != syserror.EINVAL { - t.Errorf("expected error EINVAL but got %v", err) - } - - // Make sure SEEK_END works with regular files. - switch fd.Impl().(type) { - case *regularFileFD: - // Seek back to 0. - if n, err := fd.Impl().Seek(ctx, -size, linux.SEEK_END); n != 0 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", 0, n, err) - } - - // Seek forward beyond EOF. - if n, err := fd.Impl().Seek(ctx, 1, linux.SEEK_END); n != size+1 || err != nil { - t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err) - } - - // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Impl().Seek(ctx, -(size + 1), linux.SEEK_END); err != syserror.EINVAL { - t.Errorf("expected error EINVAL but got %v", err) - } - } - }) - } -} - -// TestStatAt tests filesystem.StatAt functionality. -func TestStatAt(t *testing.T) { - type statAtTest struct { - name string - image string - path string - want linux.Statx - } - - tests := []statAtTest{ - { - name: "ext4 statx small file", - image: ext4ImagePath, - path: "/file.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13, - }, - }, - { - name: "ext3 statx small file", - image: ext3ImagePath, - path: "/file.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13, - }, - }, - { - name: "ext2 statx small file", - image: ext2ImagePath, - path: "/file.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13, - }, - }, - { - name: "ext4 statx big file", - image: ext4ImagePath, - path: "/bigfile.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13042, - }, - }, - { - name: "ext3 statx big file", - image: ext3ImagePath, - path: "/bigfile.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13042, - }, - }, - { - name: "ext2 statx big file", - image: ext2ImagePath, - path: "/bigfile.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0644 | linux.ModeRegular, - Size: 13042, - }, - }, - { - name: "ext4 statx symlink file", - image: ext4ImagePath, - path: "/symlink.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0777 | linux.ModeSymlink, - Size: 8, - }, - }, - { - name: "ext3 statx symlink file", - image: ext3ImagePath, - path: "/symlink.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0777 | linux.ModeSymlink, - Size: 8, - }, - }, - { - name: "ext2 statx symlink file", - image: ext2ImagePath, - path: "/symlink.txt", - want: linux.Statx{ - Blksize: 0x400, - Nlink: 1, - UID: 0, - GID: 0, - Mode: 0777 | linux.ModeSymlink, - Size: 8, - }, - }, - } - - // Ignore the fields that are not supported by filesystem.StatAt yet and - // those which are likely to change as the image does. - ignoredFields := map[string]bool{ - "Attributes": true, - "AttributesMask": true, - "Atime": true, - "Blocks": true, - "Btime": true, - "Ctime": true, - "DevMajor": true, - "DevMinor": true, - "Ino": true, - "Mask": true, - "Mtime": true, - "RdevMajor": true, - "RdevMinor": true, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - got, err := vfsfs.StatAt(ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path}, - &vfs.StatOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.StatAt failed for file %s in image %s: %v", test.path, test.image, err) - } - - cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool { - _, ok := ignoredFields[p.String()] - return ok - }, cmp.Ignore()) - if diff := cmp.Diff(got, test.want, cmpIgnoreFields, cmpopts.IgnoreUnexported(linux.Statx{})); diff != "" { - t.Errorf("stat mismatch (-want +got):\n%s", diff) - } - }) - } -} - -// TestRead tests the read functionality for vfs file descriptions. -func TestRead(t *testing.T) { - type readTest struct { - name string - image string - absPath string - } - - tests := []readTest{ - { - name: "ext4 read small file", - image: ext4ImagePath, - absPath: "/file.txt", - }, - { - name: "ext3 read small file", - image: ext3ImagePath, - absPath: "/file.txt", - }, - { - name: "ext2 read small file", - image: ext2ImagePath, - absPath: "/file.txt", - }, - { - name: "ext4 read big file", - image: ext4ImagePath, - absPath: "/bigfile.txt", - }, - { - name: "ext3 read big file", - image: ext3ImagePath, - absPath: "/bigfile.txt", - }, - { - name: "ext2 read big file", - image: ext2ImagePath, - absPath: "/bigfile.txt", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fd, err := vfsfs.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.absPath}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt failed: %v", err) - } - - // Get a local file descriptor and compare its functionality with a vfs file - // description for the same file. - localFile, err := testutil.FindFile(path.Join(assetsDir, test.absPath)) - if err != nil { - t.Fatalf("testutil.FindFile failed for %s: %v", test.absPath, err) - } - - f, err := os.Open(localFile) - if err != nil { - t.Fatalf("os.Open failed for %s: %v", localFile, err) - } - defer f.Close() - - // Read the entire file by reading one byte repeatedly. Doing this stress - // tests the underlying file reader implementation. - got := make([]byte, 1) - want := make([]byte, 1) - for { - n, err := f.Read(want) - fd.Impl().Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{}) - - if diff := cmp.Diff(got, want); diff != "" { - t.Errorf("file data mismatch (-want +got):\n%s", diff) - } - - // Make sure there is no more file data left after getting EOF. - if n == 0 || err == io.EOF { - if n, _ := fd.Impl().Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{}); n != 0 { - t.Errorf("extra unexpected file data in file %s in image %s", test.absPath, test.image) - } - - break - } - - if err != nil { - t.Fatalf("read failed: %v", err) - } - } - }) - } -} - -// iterDirentsCb is a simple callback which just keeps adding the dirents to an -// internal list. Implements vfs.IterDirentsCallback. -type iterDirentsCb struct { - dirents []vfs.Dirent -} - -// Compiles only if iterDirentCb implements vfs.IterDirentsCallback. -var _ vfs.IterDirentsCallback = (*iterDirentsCb)(nil) - -// newIterDirentsCb is the iterDirent -func newIterDirentCb() *iterDirentsCb { - return &iterDirentsCb{dirents: make([]vfs.Dirent, 0)} -} - -// Handle implements vfs.IterDirentsCallback.Handle. -func (cb *iterDirentsCb) Handle(dirent vfs.Dirent) bool { - cb.dirents = append(cb.dirents, dirent) - return true -} - -// TestIterDirents tests the FileDescriptionImpl.IterDirents functionality. -func TestIterDirents(t *testing.T) { - type iterDirentTest struct { - name string - image string - path string - want []vfs.Dirent - } - - wantDirents := []vfs.Dirent{ - vfs.Dirent{ - Name: ".", - Type: linux.DT_DIR, - }, - vfs.Dirent{ - Name: "..", - Type: linux.DT_DIR, - }, - vfs.Dirent{ - Name: "lost+found", - Type: linux.DT_DIR, - }, - vfs.Dirent{ - Name: "file.txt", - Type: linux.DT_REG, - }, - vfs.Dirent{ - Name: "bigfile.txt", - Type: linux.DT_REG, - }, - vfs.Dirent{ - Name: "symlink.txt", - Type: linux.DT_LNK, - }, - } - tests := []iterDirentTest{ - { - name: "ext4 root dir iteration", - image: ext4ImagePath, - path: "/", - want: wantDirents, - }, - { - name: "ext3 root dir iteration", - image: ext3ImagePath, - path: "/", - want: wantDirents, - }, - { - name: "ext2 root dir iteration", - image: ext2ImagePath, - path: "/", - want: wantDirents, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ctx, vfsfs, root, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fd, err := vfsfs.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt failed: %v", err) - } - - cb := &iterDirentsCb{} - if err = fd.Impl().IterDirents(ctx, cb); err != nil { - t.Fatalf("dir fd.IterDirents() failed: %v", err) - } - - sort.Slice(cb.dirents, func(i int, j int) bool { return cb.dirents[i].Name < cb.dirents[j].Name }) - sort.Slice(test.want, func(i int, j int) bool { return test.want[i].Name < test.want[j].Name }) - - // Ignore the inode number and offset of dirents because those are likely to - // change as the underlying image changes. - cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool { - return p.String() == "Ino" || p.String() == "Off" - }, cmp.Ignore()) - if diff := cmp.Diff(cb.dirents, test.want, cmpIgnoreFields); diff != "" { - t.Errorf("dirents mismatch (-want +got):\n%s", diff) - } - }) - } -} - -// TestRootDir tests that the root directory inode is correctly initialized and -// returned from setUp. -func TestRootDir(t *testing.T) { - type inodeProps struct { - Mode linux.FileMode - UID auth.KUID - GID auth.KGID - Size uint64 - InodeSize uint16 - Links uint16 - Flags disklayout.InodeFlags - } - - type rootDirTest struct { - name string - image string - wantInode inodeProps - } - - tests := []rootDirTest{ - { - name: "ext4 root dir", - image: ext4ImagePath, - wantInode: inodeProps{ - Mode: linux.ModeDirectory | 0755, - Size: 0x400, - InodeSize: 0x80, - Links: 3, - Flags: disklayout.InodeFlags{Extents: true}, - }, - }, - { - name: "ext3 root dir", - image: ext3ImagePath, - wantInode: inodeProps{ - Mode: linux.ModeDirectory | 0755, - Size: 0x400, - InodeSize: 0x80, - Links: 3, - }, - }, - { - name: "ext2 root dir", - image: ext2ImagePath, - wantInode: inodeProps{ - Mode: linux.ModeDirectory | 0755, - Size: 0x400, - InodeSize: 0x80, - Links: 3, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - _, _, vd, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - d, ok := vd.Dentry().Impl().(*dentry) - if !ok { - t.Fatalf("ext dentry of incorrect type: %T", vd.Dentry().Impl()) - } - - // Offload inode contents into local structs for comparison. - gotInode := inodeProps{ - Mode: d.inode.diskInode.Mode(), - UID: d.inode.diskInode.UID(), - GID: d.inode.diskInode.GID(), - Size: d.inode.diskInode.Size(), - InodeSize: d.inode.diskInode.InodeSize(), - Links: d.inode.diskInode.LinksCount(), - Flags: d.inode.diskInode.Flags(), - } - - if diff := cmp.Diff(gotInode, test.wantInode); diff != "" { - t.Errorf("inode mismatch (-want +got):\n%s", diff) - } - }) - } -} - -// TestFilesystemInit tests that the filesystem superblock and block group -// descriptors are correctly read in and initialized. -func TestFilesystemInit(t *testing.T) { - // sb only contains the immutable properties of the superblock. - type sb struct { - InodesCount uint32 - BlocksCount uint64 - MaxMountCount uint16 - FirstDataBlock uint32 - BlockSize uint64 - BlocksPerGroup uint32 - ClusterSize uint64 - ClustersPerGroup uint32 - InodeSize uint16 - InodesPerGroup uint32 - BgDescSize uint16 - Magic uint16 - Revision disklayout.SbRevision - CompatFeatures disklayout.CompatFeatures - IncompatFeatures disklayout.IncompatFeatures - RoCompatFeatures disklayout.RoCompatFeatures - } - - // bg only contains the immutable properties of the block group descriptor. - type bg struct { - InodeTable uint64 - BlockBitmap uint64 - InodeBitmap uint64 - ExclusionBitmap uint64 - Flags disklayout.BGFlags - } - - type fsInitTest struct { - name string - image string - wantSb sb - wantBgs []bg - } - - tests := []fsInitTest{ - { - name: "ext4 filesystem init", - image: ext4ImagePath, - wantSb: sb{ - InodesCount: 0x10, - BlocksCount: 0x40, - MaxMountCount: 0xffff, - FirstDataBlock: 0x1, - BlockSize: 0x400, - BlocksPerGroup: 0x2000, - ClusterSize: 0x400, - ClustersPerGroup: 0x2000, - InodeSize: 0x80, - InodesPerGroup: 0x10, - BgDescSize: 0x40, - Magic: linux.EXT_SUPER_MAGIC, - Revision: disklayout.DynamicRev, - CompatFeatures: disklayout.CompatFeatures{ - ExtAttr: true, - ResizeInode: true, - DirIndex: true, - }, - IncompatFeatures: disklayout.IncompatFeatures{ - DirentFileType: true, - Extents: true, - Is64Bit: true, - FlexBg: true, - }, - RoCompatFeatures: disklayout.RoCompatFeatures{ - Sparse: true, - LargeFile: true, - HugeFile: true, - DirNlink: true, - ExtraIsize: true, - MetadataCsum: true, - }, - }, - wantBgs: []bg{ - { - InodeTable: 0x23, - BlockBitmap: 0x3, - InodeBitmap: 0x13, - Flags: disklayout.BGFlags{ - InodeZeroed: true, - }, - }, - }, - }, - { - name: "ext3 filesystem init", - image: ext3ImagePath, - wantSb: sb{ - InodesCount: 0x10, - BlocksCount: 0x40, - MaxMountCount: 0xffff, - FirstDataBlock: 0x1, - BlockSize: 0x400, - BlocksPerGroup: 0x2000, - ClusterSize: 0x400, - ClustersPerGroup: 0x2000, - InodeSize: 0x80, - InodesPerGroup: 0x10, - BgDescSize: 0x20, - Magic: linux.EXT_SUPER_MAGIC, - Revision: disklayout.DynamicRev, - CompatFeatures: disklayout.CompatFeatures{ - ExtAttr: true, - ResizeInode: true, - DirIndex: true, - }, - IncompatFeatures: disklayout.IncompatFeatures{ - DirentFileType: true, - }, - RoCompatFeatures: disklayout.RoCompatFeatures{ - Sparse: true, - LargeFile: true, - }, - }, - wantBgs: []bg{ - { - InodeTable: 0x5, - BlockBitmap: 0x3, - InodeBitmap: 0x4, - Flags: disklayout.BGFlags{ - InodeZeroed: true, - }, - }, - }, - }, - { - name: "ext2 filesystem init", - image: ext2ImagePath, - wantSb: sb{ - InodesCount: 0x10, - BlocksCount: 0x40, - MaxMountCount: 0xffff, - FirstDataBlock: 0x1, - BlockSize: 0x400, - BlocksPerGroup: 0x2000, - ClusterSize: 0x400, - ClustersPerGroup: 0x2000, - InodeSize: 0x80, - InodesPerGroup: 0x10, - BgDescSize: 0x20, - Magic: linux.EXT_SUPER_MAGIC, - Revision: disklayout.DynamicRev, - CompatFeatures: disklayout.CompatFeatures{ - ExtAttr: true, - ResizeInode: true, - DirIndex: true, - }, - IncompatFeatures: disklayout.IncompatFeatures{ - DirentFileType: true, - }, - RoCompatFeatures: disklayout.RoCompatFeatures{ - Sparse: true, - LargeFile: true, - }, - }, - wantBgs: []bg{ - { - InodeTable: 0x5, - BlockBitmap: 0x3, - InodeBitmap: 0x4, - Flags: disklayout.BGFlags{ - InodeZeroed: true, - }, - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - _, _, vd, tearDown, err := setUp(t, test.image) - if err != nil { - t.Fatalf("setUp failed: %v", err) - } - defer tearDown() - - fs, ok := vd.Mount().Filesystem().Impl().(*filesystem) - if !ok { - t.Fatalf("ext filesystem of incorrect type: %T", vd.Mount().Filesystem().Impl()) - } - - // Offload superblock and block group descriptors contents into - // local structs for comparison. - totalFreeInodes := uint32(0) - totalFreeBlocks := uint64(0) - gotSb := sb{ - InodesCount: fs.sb.InodesCount(), - BlocksCount: fs.sb.BlocksCount(), - MaxMountCount: fs.sb.MaxMountCount(), - FirstDataBlock: fs.sb.FirstDataBlock(), - BlockSize: fs.sb.BlockSize(), - BlocksPerGroup: fs.sb.BlocksPerGroup(), - ClusterSize: fs.sb.ClusterSize(), - ClustersPerGroup: fs.sb.ClustersPerGroup(), - InodeSize: fs.sb.InodeSize(), - InodesPerGroup: fs.sb.InodesPerGroup(), - BgDescSize: fs.sb.BgDescSize(), - Magic: fs.sb.Magic(), - Revision: fs.sb.Revision(), - CompatFeatures: fs.sb.CompatibleFeatures(), - IncompatFeatures: fs.sb.IncompatibleFeatures(), - RoCompatFeatures: fs.sb.ReadOnlyCompatibleFeatures(), - } - gotNumBgs := len(fs.bgs) - gotBgs := make([]bg, gotNumBgs) - for i := 0; i < gotNumBgs; i++ { - gotBgs[i].InodeTable = fs.bgs[i].InodeTable() - gotBgs[i].BlockBitmap = fs.bgs[i].BlockBitmap() - gotBgs[i].InodeBitmap = fs.bgs[i].InodeBitmap() - gotBgs[i].ExclusionBitmap = fs.bgs[i].ExclusionBitmap() - gotBgs[i].Flags = fs.bgs[i].Flags() - - totalFreeInodes += fs.bgs[i].FreeInodesCount() - totalFreeBlocks += uint64(fs.bgs[i].FreeBlocksCount()) - } - - if diff := cmp.Diff(gotSb, test.wantSb); diff != "" { - t.Errorf("superblock mismatch (-want +got):\n%s", diff) - } - - if diff := cmp.Diff(gotBgs, test.wantBgs); diff != "" { - t.Errorf("block group descriptors mismatch (-want +got):\n%s", diff) - } - - if diff := cmp.Diff(totalFreeInodes, fs.sb.FreeInodesCount()); diff != "" { - t.Errorf("total free inodes mismatch (-want +got):\n%s", diff) - } - - if diff := cmp.Diff(totalFreeBlocks, fs.sb.FreeBlocksCount()); diff != "" { - t.Errorf("total free blocks mismatch (-want +got):\n%s", diff) - } - }) - } -} diff --git a/pkg/sentry/fs/ext/extent_file.go b/pkg/sentry/fs/ext/extent_file.go deleted file mode 100644 index 1b9bf449b..000000000 --- a/pkg/sentry/fs/ext/extent_file.go +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - "sort" - - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/syserror" -) - -// extentFile is a type of regular file which uses extents to store file data. -type extentFile struct { - regFile regularFile - - // root is the root extent node. This lives in the 60 byte diskInode.Data(). - // Immutable. - root disklayout.ExtentNode -} - -// Compiles only if extentFile implements io.ReaderAt. -var _ io.ReaderAt = (*extentFile)(nil) - -// newExtentFile is the extent file constructor. It reads the entire extent -// tree into memory. -// TODO(b/134676337): Build extent tree on demand to reduce memory usage. -func newExtentFile(regFile regularFile) (*extentFile, error) { - file := &extentFile{regFile: regFile} - file.regFile.impl = file - err := file.buildExtTree() - if err != nil { - return nil, err - } - return file, nil -} - -// buildExtTree builds the extent tree by reading it from disk by doing -// running a simple DFS. It first reads the root node from the inode struct in -// memory. Then it recursively builds the rest of the tree by reading it off -// disk. -// -// Precondition: inode flag InExtents must be set. -func (f *extentFile) buildExtTree() error { - rootNodeData := f.regFile.inode.diskInode.Data() - - binary.Unmarshal(rootNodeData[:disklayout.ExtentStructsSize], binary.LittleEndian, &f.root.Header) - - // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries. - if f.root.Header.NumEntries > 4 { - // read(2) specifies that EINVAL should be returned if the file is unsuitable - // for reading. - return syserror.EINVAL - } - - f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries) - for i, off := uint16(0), disklayout.ExtentStructsSize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize { - var curEntry disklayout.ExtentEntry - if f.root.Header.Height == 0 { - // Leaf node. - curEntry = &disklayout.Extent{} - } else { - // Internal node. - curEntry = &disklayout.ExtentIdx{} - } - binary.Unmarshal(rootNodeData[off:off+disklayout.ExtentStructsSize], binary.LittleEndian, curEntry) - f.root.Entries[i].Entry = curEntry - } - - // If this node is internal, perform DFS. - if f.root.Header.Height > 0 { - for i := uint16(0); i < f.root.Header.NumEntries; i++ { - var err error - if f.root.Entries[i].Node, err = f.buildExtTreeFromDisk(f.root.Entries[i].Entry); err != nil { - return err - } - } - } - - return nil -} - -// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively -// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to -// by the ExtentEntry. -func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) { - var header disklayout.ExtentHeader - off := entry.PhysicalBlock() * f.regFile.inode.blkSize - err := readFromDisk(f.regFile.inode.dev, int64(off), &header) - if err != nil { - return nil, err - } - - entries := make([]disklayout.ExtentEntryPair, header.NumEntries) - for i, off := uint16(0), off+disklayout.ExtentStructsSize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize { - var curEntry disklayout.ExtentEntry - if header.Height == 0 { - // Leaf node. - curEntry = &disklayout.Extent{} - } else { - // Internal node. - curEntry = &disklayout.ExtentIdx{} - } - - err := readFromDisk(f.regFile.inode.dev, int64(off), curEntry) - if err != nil { - return nil, err - } - entries[i].Entry = curEntry - } - - // If this node is internal, perform DFS. - if header.Height > 0 { - for i := uint16(0); i < header.NumEntries; i++ { - var err error - entries[i].Node, err = f.buildExtTreeFromDisk(entries[i].Entry) - if err != nil { - return nil, err - } - } - } - - return &disklayout.ExtentNode{header, entries}, nil -} - -// ReadAt implements io.ReaderAt.ReadAt. -func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) { - if len(dst) == 0 { - return 0, nil - } - - if off < 0 { - return 0, syserror.EINVAL - } - - if uint64(off) >= f.regFile.inode.diskInode.Size() { - return 0, io.EOF - } - - n, err := f.read(&f.root, uint64(off), dst) - if n < len(dst) && err == nil { - err = io.EOF - } - return n, err -} - -// read is the recursive step of extentFile.ReadAt which traverses the extent -// tree from the node passed and reads file data. -func (f *extentFile) read(node *disklayout.ExtentNode, off uint64, dst []byte) (int, error) { - // Perform a binary search for the node covering bytes starting at r.fileOff. - // A highly fragmented filesystem can have upto 340 entries and so linear - // search should be avoided. Finds the first entry which does not cover the - // file block we want and subtracts 1 to get the desired index. - fileBlk := uint32(off / f.regFile.inode.blkSize) - n := len(node.Entries) - found := sort.Search(n, func(i int) bool { - return node.Entries[i].Entry.FileBlock() > fileBlk - }) - 1 - - // We should be in this recursive step only if the data we want exists under - // the current node. - if found < 0 { - panic("searching for a file block in an extent entry which does not cover it") - } - - read := 0 - toRead := len(dst) - var curR int - var err error - for i := found; i < n && read < toRead; i++ { - if node.Header.Height == 0 { - curR, err = f.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), off, dst[read:]) - } else { - curR, err = f.read(node.Entries[i].Node, off, dst[read:]) - } - - read += curR - off += uint64(curR) - if err != nil { - return read, err - } - } - - return read, nil -} - -// readFromExtent reads file data from the extent. It takes advantage of the -// sequential nature of extents and reads file data from multiple blocks in one -// call. -// -// A non-nil error indicates that this is a partial read and there is probably -// more to read from this extent. The caller should propagate the error upward -// and not move to the next extent in the tree. -// -// A subsequent call to extentReader.Read should continue reading from where we -// left off as expected. -func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byte) (int, error) { - curFileBlk := uint32(off / f.regFile.inode.blkSize) - exFirstFileBlk := ex.FileBlock() - exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive. - - // We should be in this recursive step only if the data we want exists under - // the current extent. - if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk { - panic("searching for a file block in an extent which does not cover it") - } - - curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock() - readStart := curPhyBlk*f.regFile.inode.blkSize + (off % f.regFile.inode.blkSize) - - endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length) - extentEnd := endPhyBlk * f.regFile.inode.blkSize // This is exclusive. - - toRead := int(extentEnd - readStart) - if len(dst) < toRead { - toRead = len(dst) - } - - n, _ := f.regFile.inode.dev.ReadAt(dst[:toRead], int64(readStart)) - if n < toRead { - return n, syserror.EIO - } - return n, nil -} diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go deleted file mode 100644 index d03cd564f..000000000 --- a/pkg/sentry/fs/ext/extent_test.go +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "bytes" - "math/rand" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" -) - -const ( - // mockExtentBlkSize is the mock block size used for testing. - // No block has more than 1 header + 4 entries. - mockExtentBlkSize = uint64(64) -) - -// The tree described below looks like: -// -// 0.{Head}[Idx][Idx] -// / \ -// / \ -// 1.{Head}[Ext][Ext] 2.{Head}[Idx] -// / | \ -// [Phy] [Phy, Phy] 3.{Head}[Ext] -// | -// [Phy, Phy, Phy] -// -// Legend: -// - Head = ExtentHeader -// - Idx = ExtentIdx -// - Ext = Extent -// - Phy = Physical Block -// -// Please note that ext4 might not construct extent trees looking like this. -// This is purely for testing the tree traversal logic. -var ( - node3 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 1, - MaxEntries: 4, - Height: 0, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.Extent{ - FirstFileBlock: 3, - Length: 3, - StartBlockLo: 6, - }, - Node: nil, - }, - }, - } - - node2 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 1, - MaxEntries: 4, - Height: 1, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.ExtentIdx{ - FirstFileBlock: 3, - ChildBlockLo: 2, - }, - Node: node3, - }, - }, - } - - node1 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 2, - MaxEntries: 4, - Height: 0, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.Extent{ - FirstFileBlock: 0, - Length: 1, - StartBlockLo: 3, - }, - Node: nil, - }, - { - Entry: &disklayout.Extent{ - FirstFileBlock: 1, - Length: 2, - StartBlockLo: 4, - }, - Node: nil, - }, - }, - } - - node0 = &disklayout.ExtentNode{ - Header: disklayout.ExtentHeader{ - Magic: disklayout.ExtentMagic, - NumEntries: 2, - MaxEntries: 4, - Height: 2, - }, - Entries: []disklayout.ExtentEntryPair{ - { - Entry: &disklayout.ExtentIdx{ - FirstFileBlock: 0, - ChildBlockLo: 0, - }, - Node: node1, - }, - { - Entry: &disklayout.ExtentIdx{ - FirstFileBlock: 3, - ChildBlockLo: 1, - }, - Node: node2, - }, - }, - } -) - -// TestExtentReader stress tests extentReader functionality. It performs random -// length reads from all possible positions in the extent tree. -func TestExtentReader(t *testing.T) { - mockExtentFile, want := extentTreeSetUp(t, node0) - n := len(want) - - for from := 0; from < n; from++ { - got := make([]byte, n-from) - - if read, err := mockExtentFile.ReadAt(got, int64(from)); err != nil { - t.Fatalf("file read operation from offset %d to %d only read %d bytes: %v", from, n, read, err) - } - - if diff := cmp.Diff(got, want[from:]); diff != "" { - t.Fatalf("file data from offset %d to %d mismatched (-want +got):\n%s", from, n, diff) - } - } -} - -// TestBuildExtentTree tests the extent tree building logic. -func TestBuildExtentTree(t *testing.T) { - mockExtentFile, _ := extentTreeSetUp(t, node0) - - opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{}) - if diff := cmp.Diff(&mockExtentFile.root, node0, opt); diff != "" { - t.Errorf("extent tree mismatch (-want +got):\n%s", diff) - } -} - -// extentTreeSetUp writes the passed extent tree to a mock disk as an extent -// tree. It also constucts a mock extent file with the same tree built in it. -// It also writes random data file data and returns it. -func extentTreeSetUp(t *testing.T, root *disklayout.ExtentNode) (*extentFile, []byte) { - t.Helper() - - mockDisk := make([]byte, mockExtentBlkSize*10) - mockExtentFile := &extentFile{ - regFile: regularFile{ - inode: inode{ - diskInode: &disklayout.InodeNew{ - InodeOld: disklayout.InodeOld{ - SizeLo: uint32(mockExtentBlkSize) * getNumPhyBlks(root), - }, - }, - blkSize: mockExtentBlkSize, - dev: bytes.NewReader(mockDisk), - }, - }, - } - - fileData := writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, mockExtentBlkSize) - - if err := mockExtentFile.buildExtTree(); err != nil { - t.Fatalf("inode.buildExtTree failed: %v", err) - } - return mockExtentFile, fileData -} - -// writeTree writes the tree represented by `root` to the inode and disk. It -// also writes random file data on disk. -func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, mockExtentBlkSize uint64) []byte { - rootData := binary.Marshal(nil, binary.LittleEndian, root.Header) - for _, ep := range root.Entries { - rootData = binary.Marshal(rootData, binary.LittleEndian, ep.Entry) - } - - copy(in.diskInode.Data(), rootData) - - var fileData []byte - for _, ep := range root.Entries { - if root.Header.Height == 0 { - fileData = append(fileData, writeFileDataToExtent(disk, ep.Entry.(*disklayout.Extent))...) - } else { - fileData = append(fileData, writeTreeToDisk(disk, ep)...) - } - } - return fileData -} - -// writeTreeToDisk is the recursive step for writeTree which writes the tree -// on the disk only. Also writes random file data on disk. -func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair) []byte { - nodeData := binary.Marshal(nil, binary.LittleEndian, curNode.Node.Header) - for _, ep := range curNode.Node.Entries { - nodeData = binary.Marshal(nodeData, binary.LittleEndian, ep.Entry) - } - - copy(disk[curNode.Entry.PhysicalBlock()*mockExtentBlkSize:], nodeData) - - var fileData []byte - for _, ep := range curNode.Node.Entries { - if curNode.Node.Header.Height == 0 { - fileData = append(fileData, writeFileDataToExtent(disk, ep.Entry.(*disklayout.Extent))...) - } else { - fileData = append(fileData, writeTreeToDisk(disk, ep)...) - } - } - return fileData -} - -// writeFileDataToExtent writes random bytes to the blocks on disk that the -// passed extent points to. -func writeFileDataToExtent(disk []byte, ex *disklayout.Extent) []byte { - phyExStartBlk := ex.PhysicalBlock() - phyExStartOff := phyExStartBlk * mockExtentBlkSize - phyExEndOff := phyExStartOff + uint64(ex.Length)*mockExtentBlkSize - rand.Read(disk[phyExStartOff:phyExEndOff]) - return disk[phyExStartOff:phyExEndOff] -} - -// getNumPhyBlks returns the number of physical blocks covered under the node. -func getNumPhyBlks(node *disklayout.ExtentNode) uint32 { - var res uint32 - for _, ep := range node.Entries { - if node.Header.Height == 0 { - res += uint32(ep.Entry.(*disklayout.Extent).Length) - } else { - res += getNumPhyBlks(ep.Node) - } - } - return res -} diff --git a/pkg/sentry/fs/ext/file_description.go b/pkg/sentry/fs/ext/file_description.go deleted file mode 100644 index d244cf1e7..000000000 --- a/pkg/sentry/fs/ext/file_description.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/waiter" -) - -// fileDescription is embedded by ext implementations of -// vfs.FileDescriptionImpl. -type fileDescription struct { - vfsfd vfs.FileDescription - - // flags is the same as vfs.OpenOptions.Flags which are passed to - // vfs.FilesystemImpl.OpenAt. - // TODO(b/134676337): syscalls like read(2), write(2), fchmod(2), fchown(2), - // fgetxattr(2), ioctl(2), mmap(2) should fail with EBADF if O_PATH is set. - // Only close(2), fstat(2), fstatfs(2) should work. - flags uint32 -} - -func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem) -} - -func (fd *fileDescription) inode() *inode { - return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode -} - -// OnClose implements vfs.FileDescriptionImpl.OnClose. -func (fd *fileDescription) OnClose() error { return nil } - -// StatusFlags implements vfs.FileDescriptionImpl.StatusFlags. -func (fd *fileDescription) StatusFlags(ctx context.Context) (uint32, error) { - return fd.flags, nil -} - -// SetStatusFlags implements vfs.FileDescriptionImpl.SetStatusFlags. -func (fd *fileDescription) SetStatusFlags(ctx context.Context, flags uint32) error { - // None of the flags settable by fcntl(F_SETFL) are supported, so this is a - // no-op. - return nil -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - var stat linux.Statx - fd.inode().statTo(&stat) - return stat, nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - if opts.Stat.Mask == 0 { - return nil - } - return syserror.EPERM -} - -// SetStat implements vfs.FileDescriptionImpl.StatFS. -func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { - var stat linux.Statfs - fd.filesystem().statTo(&stat) - return stat, nil -} - -// Readiness implements waiter.Waitable.Readiness analogously to -// file_operations::poll == NULL in Linux. -func (fd *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { - // include/linux/poll.h:vfs_poll() => DEFAULT_POLLMASK - return waiter.EventIn | waiter.EventOut -} - -// EventRegister implements waiter.Waitable.EventRegister analogously to -// file_operations::poll == NULL in Linux. -func (fd *fileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {} - -// EventUnregister implements waiter.Waitable.EventUnregister analogously to -// file_operations::poll == NULL in Linux. -func (fd *fileDescription) EventUnregister(e *waiter.Entry) {} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *fileDescription) Sync(ctx context.Context) error { - return nil -} - -// Ioctl implements vfs.FileDescriptionImpl.Ioctl. -func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { - // ioctl(2) specifies that ENOTTY must be returned if the file descriptor is - // not associated with a character special device (which is unimplemented). - return 0, syserror.ENOTTY -} diff --git a/pkg/sentry/fs/ext/filesystem.go b/pkg/sentry/fs/ext/filesystem.go deleted file mode 100644 index e08839f48..000000000 --- a/pkg/sentry/fs/ext/filesystem.go +++ /dev/null @@ -1,443 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "errors" - "io" - "sync" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -var ( - // errResolveDirent indicates that the vfs.ResolvingPath.Component() does - // not exist on the dentry tree but does exist on disk. So it has to be read in - // using the in-memory dirent and added to the dentry tree. Usually indicates - // the need to lock filesystem.mu for writing. - errResolveDirent = errors.New("resolve path component using dirent") -) - -// filesystem implements vfs.FilesystemImpl. -type filesystem struct { - vfsfs vfs.Filesystem - - // mu serializes changes to the Dentry tree. - mu sync.RWMutex - - // dev represents the underlying fs device. It does not require protection - // because io.ReaderAt permits concurrent read calls to it. It translates to - // the pread syscall which passes on the read request directly to the device - // driver. Device drivers are intelligent in serving multiple concurrent read - // requests in the optimal order (taking locality into consideration). - dev io.ReaderAt - - // inodeCache maps absolute inode numbers to the corresponding Inode struct. - // Inodes should be removed from this once their reference count hits 0. - // - // Protected by mu because most additions (see IterDirents) and all removals - // from this corresponds to a change in the dentry tree. - inodeCache map[uint32]*inode - - // sb represents the filesystem superblock. Immutable after initialization. - sb disklayout.SuperBlock - - // bgs represents all the block group descriptors for the filesystem. - // Immutable after initialization. - bgs []disklayout.BlockGroup -} - -// Compiles only if filesystem implements vfs.FilesystemImpl. -var _ vfs.FilesystemImpl = (*filesystem)(nil) - -// stepLocked resolves rp.Component() in parent directory vfsd. The write -// parameter passed tells if the caller has acquired filesystem.mu for writing -// or not. If set to true, an existing inode on disk can be added to the dentry -// tree if not present already. -// -// stepLocked is loosely analogous to fs/namei.c:walk_component(). -// -// Preconditions: -// - filesystem.mu must be locked (for writing if write param is true). -// - !rp.Done(). -// - inode == vfsd.Impl().(*Dentry).inode. -func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) { - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { - return nil, nil, err - } - - for { - nextVFSD, err := rp.ResolveComponent(vfsd) - if err != nil { - return nil, nil, err - } - if nextVFSD == nil { - // Since the Dentry tree is not the sole source of truth for extfs, if it's - // not in the Dentry tree, it might need to be pulled from disk. - childDirent, ok := inode.impl.(*directory).childMap[rp.Component()] - if !ok { - // The underlying inode does not exist on disk. - return nil, nil, syserror.ENOENT - } - - if !write { - // filesystem.mu must be held for writing to add to the dentry tree. - return nil, nil, errResolveDirent - } - - // Create and add the component's dirent to the dentry tree. - fs := rp.Mount().Filesystem().Impl().(*filesystem) - childInode, err := fs.getOrCreateInodeLocked(childDirent.diskDirent.Inode()) - if err != nil { - return nil, nil, err - } - // incRef because this is being added to the dentry tree. - childInode.incRef() - child := newDentry(childInode) - vfsd.InsertChild(&child.vfsd, rp.Component()) - - // Continue as usual now that nextVFSD is not nil. - nextVFSD = &child.vfsd - } - nextInode := nextVFSD.Impl().(*dentry).inode - if nextInode.isSymlink() && rp.ShouldFollowSymlink() { - if err := rp.HandleSymlink(inode.impl.(*symlink).target); err != nil { - return nil, nil, err - } - continue - } - rp.Advance() - return nextVFSD, nextInode, nil - } -} - -// walkLocked resolves rp to an existing file. The write parameter -// passed tells if the caller has acquired filesystem.mu for writing or not. -// If set to true, additions can be made to the dentry tree while walking. -// If errResolveDirent is returned, the walk needs to be continued with an -// upgraded filesystem.mu. -// -// walkLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). -// -// Preconditions: -// - filesystem.mu must be locked (for writing if write param is true). -func walkLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Done() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode, write) - if err != nil { - return nil, nil, err - } - } - if rp.MustBeDir() && !inode.isDir() { - return nil, nil, syserror.ENOTDIR - } - return vfsd, inode, nil -} - -// walkParentLocked resolves all but the last path component of rp to an -// existing directory. It does not check that the returned directory is -// searchable by the provider of rp. The write parameter passed tells if the -// caller has acquired filesystem.mu for writing or not. If set to true, -// additions can be made to the dentry tree while walking. -// If errResolveDirent is returned, the walk needs to be continued with an -// upgraded filesystem.mu. -// -// walkParentLocked is loosely analogous to Linux's fs/namei.c:path_parentat(). -// -// Preconditions: -// - filesystem.mu must be locked (for writing if write param is true). -// - !rp.Done(). -func walkParentLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode, write) - if err != nil { - return nil, nil, err - } - } - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR - } - return vfsd, inode, nil -} - -// walk resolves rp to an existing file. If parent is set to true, it resolves -// the rp till the parent of the last component which should be an existing -// directory. If parent is false then resolves rp entirely. Attemps to resolve -// the path as far as it can with a read lock and upgrades the lock if needed. -func (fs *filesystem) walk(rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) { - var ( - vfsd *vfs.Dentry - inode *inode - err error - ) - - // Try walking with the hopes that all dentries have already been pulled out - // of disk. This reduces congestion (allows concurrent walks). - fs.mu.RLock() - if parent { - vfsd, inode, err = walkParentLocked(rp, false) - } else { - vfsd, inode, err = walkLocked(rp, false) - } - fs.mu.RUnlock() - - if err == errResolveDirent { - // Upgrade lock and continue walking. Lock upgrading in the middle of the - // walk is fine as this is a read only filesystem. - fs.mu.Lock() - if parent { - vfsd, inode, err = walkParentLocked(rp, true) - } else { - vfsd, inode, err = walkLocked(rp, true) - } - fs.mu.Unlock() - } - - return vfsd, inode, err -} - -// getOrCreateInodeLocked gets the inode corresponding to the inode number passed in. -// It creates a new one with the given inode number if one does not exist. -// The caller must increment the ref count if adding this to the dentry tree. -// -// Precondition: must be holding fs.mu for writing. -func (fs *filesystem) getOrCreateInodeLocked(inodeNum uint32) (*inode, error) { - if in, ok := fs.inodeCache[inodeNum]; ok { - return in, nil - } - - in, err := newInode(fs, inodeNum) - if err != nil { - return nil, err - } - - fs.inodeCache[inodeNum] = in - return in, nil -} - -// statTo writes the statfs fields to the output parameter. -func (fs *filesystem) statTo(stat *linux.Statfs) { - stat.Type = uint64(fs.sb.Magic()) - stat.BlockSize = int64(fs.sb.BlockSize()) - stat.Blocks = fs.sb.BlocksCount() - stat.BlocksFree = fs.sb.FreeBlocksCount() - stat.BlocksAvailable = fs.sb.FreeBlocksCount() - stat.Files = uint64(fs.sb.InodesCount()) - stat.FilesFree = uint64(fs.sb.FreeInodesCount()) - stat.NameLength = disklayout.MaxFileName - stat.FragmentSize = int64(fs.sb.BlockSize()) - // TODO(b/134676337): Set Statfs.Flags and Statfs.FSID. -} - -// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. -func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - vfsd, inode, err := fs.walk(rp, false) - if err != nil { - return nil, err - } - - if opts.CheckSearchable { - if !inode.isDir() { - return nil, syserror.ENOTDIR - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { - return nil, err - } - } - - inode.incRef() - return vfsd, nil -} - -// OpenAt implements vfs.FilesystemImpl.OpenAt. -func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - vfsd, inode, err := fs.walk(rp, false) - if err != nil { - return nil, err - } - - // EROFS is returned if write access is needed. - if vfs.MayWriteFileWithOpenFlags(opts.Flags) || opts.Flags&(linux.O_CREAT|linux.O_EXCL|linux.O_TMPFILE) != 0 { - return nil, syserror.EROFS - } - return inode.open(rp, vfsd, opts.Flags) -} - -// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. -func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - _, inode, err := fs.walk(rp, false) - if err != nil { - return "", err - } - symlink, ok := inode.impl.(*symlink) - if !ok { - return "", syserror.EINVAL - } - return symlink.target, nil -} - -// StatAt implements vfs.FilesystemImpl.StatAt. -func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - _, inode, err := fs.walk(rp, false) - if err != nil { - return linux.Statx{}, err - } - var stat linux.Statx - inode.statTo(&stat) - return stat, nil -} - -// StatFSAt implements vfs.FilesystemImpl.StatFSAt. -func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - if _, _, err := fs.walk(rp, false); err != nil { - return linux.Statfs{}, err - } - - var stat linux.Statfs - fs.statTo(&stat) - return stat, nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() {} - -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // This is a readonly filesystem for now. - return nil -} - -// The vfs.FilesystemImpl functions below return EROFS because their respective -// man pages say that EROFS must be returned if the path resolves to a file on -// this read-only filesystem. - -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - if rp.Done() { - return syserror.EEXIST - } - - if _, _, err := fs.walk(rp, true); err != nil { - return err - } - - return syserror.EROFS -} - -// MkdirAt implements vfs.FilesystemImpl.MkdirAt. -func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - if rp.Done() { - return syserror.EEXIST - } - - if _, _, err := fs.walk(rp, true); err != nil { - return err - } - - return syserror.EROFS -} - -// MknodAt implements vfs.FilesystemImpl.MknodAt. -func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - if rp.Done() { - return syserror.EEXIST - } - - _, _, err := fs.walk(rp, true) - if err != nil { - return err - } - - return syserror.EROFS -} - -// RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error { - if rp.Done() { - return syserror.ENOENT - } - - _, _, err := fs.walk(rp, false) - if err != nil { - return err - } - - return syserror.EROFS -} - -// RmdirAt implements vfs.FilesystemImpl.RmdirAt. -func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - _, inode, err := fs.walk(rp, false) - if err != nil { - return err - } - - if !inode.isDir() { - return syserror.ENOTDIR - } - - return syserror.EROFS -} - -// SetStatAt implements vfs.FilesystemImpl.SetStatAt. -func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - _, _, err := fs.walk(rp, false) - if err != nil { - return err - } - - return syserror.EROFS -} - -// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. -func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - if rp.Done() { - return syserror.EEXIST - } - - _, _, err := fs.walk(rp, true) - if err != nil { - return err - } - - return syserror.EROFS -} - -// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. -func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - _, inode, err := fs.walk(rp, false) - if err != nil { - return err - } - - if inode.isDir() { - return syserror.EISDIR - } - - return syserror.EROFS -} diff --git a/pkg/sentry/fs/ext/inode.go b/pkg/sentry/fs/ext/inode.go deleted file mode 100644 index 178bd6376..000000000 --- a/pkg/sentry/fs/ext/inode.go +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "fmt" - "io" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// inode represents an ext inode. -// -// inode uses the same inheritance pattern that pkg/sentry/vfs structures use. -// This has been done to increase memory locality. -// -// Implementations: -// inode -- -// |-- dir -// |-- symlink -// |-- regular-- -// |-- extent file -// |-- block map file -type inode struct { - // refs is a reference count. refs is accessed using atomic memory operations. - refs int64 - - // inodeNum is the inode number of this inode on disk. This is used to - // identify inodes within the ext filesystem. - inodeNum uint32 - - // dev represents the underlying device. Same as filesystem.dev. - dev io.ReaderAt - - // blkSize is the fs data block size. Same as filesystem.sb.BlockSize(). - blkSize uint64 - - // diskInode gives us access to the inode struct on disk. Immutable. - diskInode disklayout.Inode - - // This is immutable. The first field of the implementations must have inode - // as the first field to ensure temporality. - impl interface{} -} - -// incRef increments the inode ref count. -func (in *inode) incRef() { - atomic.AddInt64(&in.refs, 1) -} - -// tryIncRef tries to increment the ref count. Returns true if successful. -func (in *inode) tryIncRef() bool { - for { - refs := atomic.LoadInt64(&in.refs) - if refs == 0 { - return false - } - if atomic.CompareAndSwapInt64(&in.refs, refs, refs+1) { - return true - } - } -} - -// decRef decrements the inode ref count and releases the inode resources if -// the ref count hits 0. -// -// Precondition: Must have locked fs.mu. -func (in *inode) decRef(fs *filesystem) { - if refs := atomic.AddInt64(&in.refs, -1); refs == 0 { - delete(fs.inodeCache, in.inodeNum) - } else if refs < 0 { - panic("ext.inode.decRef() called without holding a reference") - } -} - -// newInode is the inode constructor. Reads the inode off disk. Identifies -// inodes based on the absolute inode number on disk. -func newInode(fs *filesystem, inodeNum uint32) (*inode, error) { - if inodeNum == 0 { - panic("inode number 0 on ext filesystems is not possible") - } - - inodeRecordSize := fs.sb.InodeSize() - var diskInode disklayout.Inode - if inodeRecordSize == disklayout.OldInodeSize { - diskInode = &disklayout.InodeOld{} - } else { - diskInode = &disklayout.InodeNew{} - } - - // Calculate where the inode is actually placed. - inodesPerGrp := fs.sb.InodesPerGroup() - blkSize := fs.sb.BlockSize() - inodeTableOff := fs.bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize - inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp)) - - if err := readFromDisk(fs.dev, int64(inodeOff), diskInode); err != nil { - return nil, err - } - - // Build the inode based on its type. - inode := inode{ - inodeNum: inodeNum, - dev: fs.dev, - blkSize: blkSize, - diskInode: diskInode, - } - - switch diskInode.Mode().FileType() { - case linux.ModeSymlink: - f, err := newSymlink(inode) - if err != nil { - return nil, err - } - return &f.inode, nil - case linux.ModeRegular: - f, err := newRegularFile(inode) - if err != nil { - return nil, err - } - return &f.inode, nil - case linux.ModeDirectory: - f, err := newDirectroy(inode, fs.sb.IncompatibleFeatures().DirentFileType) - if err != nil { - return nil, err - } - return &f.inode, nil - default: - // TODO(b/134676337): Return appropriate errors for sockets, pipes and devices. - return nil, syserror.EINVAL - } -} - -// open creates and returns a file description for the dentry passed in. -func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(flags) - if err := in.checkPermissions(rp.Credentials(), ats); err != nil { - return nil, err - } - switch in.impl.(type) { - case *regularFile: - var fd regularFileFD - fd.flags = flags - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) - return &fd.vfsfd, nil - case *directory: - // Can't open directories writably. This check is not necessary for a read - // only filesystem but will be required when write is implemented. - if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR - } - var fd directoryFD - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) - fd.flags = flags - return &fd.vfsfd, nil - case *symlink: - if flags&linux.O_PATH == 0 { - // Can't open symlinks without O_PATH. - return nil, syserror.ELOOP - } - var fd symlinkFD - fd.flags = flags - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) - return &fd.vfsfd, nil - default: - panic(fmt.Sprintf("unknown inode type: %T", in.impl)) - } -} - -func (in *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error { - return vfs.GenericCheckPermissions(creds, ats, in.isDir(), uint16(in.diskInode.Mode()), in.diskInode.UID(), in.diskInode.GID()) -} - -// statTo writes the statx fields to the output parameter. -func (in *inode) statTo(stat *linux.Statx) { - stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | - linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE | - linux.STATX_ATIME | linux.STATX_CTIME | linux.STATX_MTIME - stat.Blksize = uint32(in.blkSize) - stat.Mode = uint16(in.diskInode.Mode()) - stat.Nlink = uint32(in.diskInode.LinksCount()) - stat.UID = uint32(in.diskInode.UID()) - stat.GID = uint32(in.diskInode.GID()) - stat.Ino = uint64(in.inodeNum) - stat.Size = in.diskInode.Size() - stat.Atime = in.diskInode.AccessTime().StatxTimestamp() - stat.Ctime = in.diskInode.ChangeTime().StatxTimestamp() - stat.Mtime = in.diskInode.ModificationTime().StatxTimestamp() - // TODO(b/134676337): Set stat.Blocks which is the number of 512 byte blocks - // (including metadata blocks) required to represent this file. -} - -// getBGNum returns the block group number that a given inode belongs to. -func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 { - return (inodeNum - 1) / inodesPerGrp -} - -// getBGOff returns the offset at which the given inode lives in the block -// group's inode table, i.e. the index of the inode in the inode table. -func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 { - return (inodeNum - 1) % inodesPerGrp -} diff --git a/pkg/sentry/fs/ext/regular_file.go b/pkg/sentry/fs/ext/regular_file.go deleted file mode 100644 index ffc76ba5b..000000000 --- a/pkg/sentry/fs/ext/regular_file.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - "sync" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// regularFile represents a regular file's inode. This too follows the -// inheritance pattern prevelant in the vfs layer described in -// pkg/sentry/vfs/README.md. -type regularFile struct { - inode inode - - // This is immutable. The first field of fileReader implementations must be - // regularFile to ensure temporality. - // io.ReaderAt is more strict than io.Reader in the sense that a partial read - // is always accompanied by an error. If a read spans past the end of file, a - // partial read (within file range) is done and io.EOF is returned. - impl io.ReaderAt -} - -// newRegularFile is the regularFile constructor. It figures out what kind of -// file this is and initializes the fileReader. -func newRegularFile(inode inode) (*regularFile, error) { - regFile := regularFile{ - inode: inode, - } - - inodeFlags := inode.diskInode.Flags() - - if inodeFlags.Extents { - file, err := newExtentFile(regFile) - if err != nil { - return nil, err - } - - file.regFile.inode.impl = &file.regFile - return &file.regFile, nil - } - - file, err := newBlockMapFile(regFile) - if err != nil { - return nil, err - } - file.regFile.inode.impl = &file.regFile - return &file.regFile, nil -} - -func (in *inode) isRegular() bool { - _, ok := in.impl.(*regularFile) - return ok -} - -// directoryFD represents a directory file description. It implements -// vfs.FileDescriptionImpl. -type regularFileFD struct { - fileDescription - - // off is the file offset. off is accessed using atomic memory operations. - off int64 - - // offMu serializes operations that may mutate off. - offMu sync.Mutex -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() {} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - safeReader := safemem.FromIOReaderAt{ - ReaderAt: fd.inode().impl.(*regularFile).impl, - Offset: offset, - } - - // Copies data from disk directly into usermem without any intermediate - // allocations (if dst is converted into BlockSeq such that it does not need - // safe copying). - return dst.CopyOutFrom(ctx, safeReader) -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.offMu.Lock() - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - // write(2) specifies that EBADF must be returned if the fd is not open for - // writing. - return 0, syserror.EBADF -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.offMu.Lock() - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *regularFileFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - return syserror.ENOTDIR -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.offMu.Lock() - defer fd.offMu.Unlock() - switch whence { - case linux.SEEK_SET: - // Use offset as specified. - case linux.SEEK_CUR: - offset += fd.off - case linux.SEEK_END: - offset += int64(fd.inode().diskInode.Size()) - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error { - // TODO(b/134676337): Implement mmap(2). - return syserror.ENODEV -} diff --git a/pkg/sentry/fs/ext/symlink.go b/pkg/sentry/fs/ext/symlink.go deleted file mode 100644 index e06548a98..000000000 --- a/pkg/sentry/fs/ext/symlink.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// symlink represents a symlink inode. -type symlink struct { - inode inode - target string // immutable -} - -// newSymlink is the symlink constructor. It reads out the symlink target from -// the inode (however it might have been stored). -func newSymlink(inode inode) (*symlink, error) { - var file *symlink - var link []byte - - // If the symlink target is lesser than 60 bytes, its stores in inode.Data(). - // Otherwise either extents or block maps will be used to store the link. - size := inode.diskInode.Size() - if size < 60 { - link = inode.diskInode.Data()[:size] - } else { - // Create a regular file out of this inode and read out the target. - regFile, err := newRegularFile(inode) - if err != nil { - return nil, err - } - - link = make([]byte, size) - if n, err := regFile.impl.ReadAt(link, 0); uint64(n) < size { - return nil, err - } - } - - file = &symlink{inode: inode, target: string(link)} - file.inode.impl = file - return file, nil -} - -func (in *inode) isSymlink() bool { - _, ok := in.impl.(*symlink) - return ok -} - -// symlinkFD represents a symlink file description and implements implements -// vfs.FileDescriptionImpl. which may only be used if open options contains -// O_PATH. For this reason most of the functions return EBADF. -type symlinkFD struct { - fileDescription -} - -// Compiles only if symlinkFD implements vfs.FileDescriptionImpl. -var _ vfs.FileDescriptionImpl = (*symlinkFD)(nil) - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *symlinkFD) Release() {} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *symlinkFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.EBADF -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *symlinkFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.EBADF -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *symlinkFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EBADF -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *symlinkFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EBADF -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *symlinkFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - return syserror.ENOTDIR -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *symlinkFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, syserror.EBADF -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *symlinkFD) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error { - return syserror.EBADF -} diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go deleted file mode 100644 index 3d89d664d..000000000 --- a/pkg/sentry/fs/ext/utils.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ext - -import ( - "io" - - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" - "gvisor.dev/gvisor/pkg/syserror" -) - -// readFromDisk performs a binary read from disk into the given struct from -// the absolute offset provided. -func readFromDisk(dev io.ReaderAt, abOff int64, v interface{}) error { - n := binary.Size(v) - buf := make([]byte, n) - if read, _ := dev.ReadAt(buf, abOff); read < int(n) { - return syserror.EIO - } - - binary.Unmarshal(buf, binary.LittleEndian, v) - return nil -} - -// readSuperBlock reads the SuperBlock from block group 0 in the underlying -// device. There are three versions of the superblock. This function identifies -// and returns the correct version. -func readSuperBlock(dev io.ReaderAt) (disklayout.SuperBlock, error) { - var sb disklayout.SuperBlock = &disklayout.SuperBlockOld{} - if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil { - return nil, err - } - if sb.Revision() == disklayout.OldRev { - return sb, nil - } - - sb = &disklayout.SuperBlock32Bit{} - if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil { - return nil, err - } - if !sb.IncompatibleFeatures().Is64Bit { - return sb, nil - } - - sb = &disklayout.SuperBlock64Bit{} - if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil { - return nil, err - } - return sb, nil -} - -// blockGroupsCount returns the number of block groups in the ext fs. -func blockGroupsCount(sb disklayout.SuperBlock) uint64 { - blocksCount := sb.BlocksCount() - blocksPerGroup := uint64(sb.BlocksPerGroup()) - - // Round up the result. float64 can compromise precision so do it manually. - return (blocksCount + blocksPerGroup - 1) / blocksPerGroup -} - -// readBlockGroups reads the block group descriptor table from block group 0 in -// the underlying device. -func readBlockGroups(dev io.ReaderAt, sb disklayout.SuperBlock) ([]disklayout.BlockGroup, error) { - bgCount := blockGroupsCount(sb) - bgdSize := uint64(sb.BgDescSize()) - is64Bit := sb.IncompatibleFeatures().Is64Bit - bgds := make([]disklayout.BlockGroup, bgCount) - - for i, off := uint64(0), uint64(sb.FirstDataBlock()+1)*sb.BlockSize(); i < bgCount; i, off = i+1, off+bgdSize { - if is64Bit { - bgds[i] = &disklayout.BlockGroup64Bit{} - } else { - bgds[i] = &disklayout.BlockGroup32Bit{} - } - - if err := readFromDisk(dev, int64(off), bgds[i]); err != nil { - return nil, err - } - } - return bgds, nil -} diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD deleted file mode 100644 index bf00b9c09..000000000 --- a/pkg/sentry/fs/fdpipe/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "fdpipe", - srcs = [ - "pipe.go", - "pipe_opener.go", - "pipe_state.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe", - imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/log", - "//pkg/secio", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "fdpipe_test", - size = "small", - srcs = [ - "pipe_opener_test.go", - "pipe_test.go", - ], - embed = [":fdpipe"], - deps = [ - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/usermem", - "//pkg/syserror", - "@com_github_google_uuid//:go_default_library", - ], -) diff --git a/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go b/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go new file mode 100755 index 000000000..38c1ed916 --- /dev/null +++ b/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go @@ -0,0 +1,27 @@ +// automatically generated by stateify. + +package fdpipe + +import ( + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +func (x *pipeOperations) save(m state.Map) { + x.beforeSave() + var flags fs.FileFlags = x.saveFlags() + m.SaveValue("flags", flags) + m.Save("opener", &x.opener) + m.Save("readAheadBuffer", &x.readAheadBuffer) +} + +func (x *pipeOperations) load(m state.Map) { + m.LoadWait("opener", &x.opener) + m.Load("readAheadBuffer", &x.readAheadBuffer) + m.LoadValue("flags", new(fs.FileFlags), func(y interface{}) { x.loadFlags(y.(fs.FileFlags)) }) + m.AfterLoad(x.afterLoad) +} + +func init() { + state.Register("fdpipe.pipeOperations", (*pipeOperations)(nil), state.Fns{Save: (*pipeOperations).save, Load: (*pipeOperations).load}) +} diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go deleted file mode 100644 index 8e4d839e1..000000000 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ /dev/null @@ -1,522 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fdpipe - -import ( - "bytes" - "fmt" - "io" - "os" - "path" - "syscall" - "testing" - "time" - - "github.com/google/uuid" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" -) - -type hostOpener struct { - name string -} - -func (h *hostOpener) NonBlockingOpen(_ context.Context, p fs.PermMask) (*fd.FD, error) { - var flags int - switch { - case p.Read && p.Write: - flags = syscall.O_RDWR - case p.Write: - flags = syscall.O_WRONLY - case p.Read: - flags = syscall.O_RDONLY - default: - return nil, syscall.EINVAL - } - f, err := syscall.Open(h.name, flags|syscall.O_NONBLOCK, 0666) - if err != nil { - return nil, err - } - return fd.New(f), nil -} - -func pipename() string { - return fmt.Sprintf(path.Join(os.TempDir(), "test-named-pipe-%s"), uuid.New()) -} - -func mkpipe(name string) error { - return syscall.Mknod(name, syscall.S_IFIFO|0666, 0) -} - -func TestTryOpen(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // makePipe is true if the test case should create the pipe. - makePipe bool - - // flags are the fs.FileFlags used to open the pipe. - flags fs.FileFlags - - // expectFile is true if a fs.File is expected. - expectFile bool - - // err is the expected error - err error - }{ - { - desc: "FileFlags lacking Read and Write are invalid", - makePipe: false, - flags: fs.FileFlags{}, /* bogus */ - expectFile: false, - err: syscall.EINVAL, - }, - { - desc: "NonBlocking Read only error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true, NonBlocking: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "NonBlocking Read only success returns immediately", - makePipe: true, - flags: fs.FileFlags{Read: true, NonBlocking: true}, - expectFile: true, - err: nil, - }, - { - desc: "NonBlocking Write only error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Write: true, NonBlocking: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "NonBlocking Write only no reader error returns immediately", - makePipe: true, - flags: fs.FileFlags{Write: true, NonBlocking: true}, - expectFile: false, - err: syscall.ENXIO, - }, - { - desc: "ReadWrite error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true, Write: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "ReadWrite returns immediately", - makePipe: true, - flags: fs.FileFlags{Read: true, Write: true}, - expectFile: true, - err: nil, - }, - { - desc: "Blocking Write only returns open error", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Write: true}, - expectFile: false, - err: syscall.ENOENT, /* from bogus perms */ - }, - { - desc: "Blocking Read only returns open error", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "Blocking Write only returns with syserror.ErrWouldBlock", - makePipe: true, - flags: fs.FileFlags{Write: true}, - expectFile: false, - err: syserror.ErrWouldBlock, - }, - { - desc: "Blocking Read only returns with syserror.ErrWouldBlock", - makePipe: true, - flags: fs.FileFlags{Read: true}, - expectFile: false, - err: syserror.ErrWouldBlock, - }, - } { - name := pipename() - if test.makePipe { - // Create the pipe. We do this per-test case to keep tests independent. - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer syscall.Unlink(name) - } - - // Use a host opener to keep things simple. - opener := &hostOpener{name: name} - - pipeOpenState := &pipeOpenState{} - ctx := contexttest.Context(t) - pipeOps, err := pipeOpenState.TryOpen(ctx, opener, test.flags) - if unwrapError(err) != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - if pipeOps != nil { - // Cleanup the state of the pipe, and remove the fd from the - // fdnotifier. Sadly this needed to maintain the correctness - // of other tests because the fdnotifier is global. - pipeOps.Release() - } - continue - } - if (pipeOps != nil) != test.expectFile { - t.Errorf("%s: got non-nil file %v, want %v", test.desc, pipeOps != nil, test.expectFile) - } - if pipeOps != nil { - // Same as above. - pipeOps.Release() - } - } -} - -func TestPipeOpenUnblocksEventually(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // partnerIsReader is true if the goroutine opening the same pipe as the test case - // should open the pipe read only. Otherwise write only. This also means that the - // test case will open the pipe in the opposite way. - partnerIsReader bool - - // partnerIsBlocking is true if the goroutine opening the same pipe as the test case - // should do so without the O_NONBLOCK flag, otherwise opens the pipe with O_NONBLOCK - // until ENXIO is not returned. - partnerIsBlocking bool - }{ - { - desc: "Blocking Read with blocking writer partner opens eventually", - partnerIsReader: false, - partnerIsBlocking: true, - }, - { - desc: "Blocking Write with blocking reader partner opens eventually", - partnerIsReader: true, - partnerIsBlocking: true, - }, - { - desc: "Blocking Read with non-blocking writer partner opens eventually", - partnerIsReader: false, - partnerIsBlocking: false, - }, - { - desc: "Blocking Write with non-blocking reader partner opens eventually", - partnerIsReader: true, - partnerIsBlocking: false, - }, - } { - // Create the pipe. We do this per-test case to keep tests independent. - name := pipename() - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer syscall.Unlink(name) - - // Spawn the partner. - type fderr struct { - fd int - err error - } - errch := make(chan fderr, 1) - go func() { - var flags int - if test.partnerIsReader { - flags = syscall.O_RDONLY - } else { - flags = syscall.O_WRONLY - } - if test.partnerIsBlocking { - fd, err := syscall.Open(name, flags, 0666) - errch <- fderr{fd: fd, err: err} - } else { - var fd int - err := error(syscall.ENXIO) - for err == syscall.ENXIO { - fd, err = syscall.Open(name, flags|syscall.O_NONBLOCK, 0666) - time.Sleep(1 * time.Second) - } - errch <- fderr{fd: fd, err: err} - } - }() - - // Setup file flags for either a read only or write only open. - flags := fs.FileFlags{ - Read: !test.partnerIsReader, - Write: test.partnerIsReader, - } - - // Open the pipe in a blocking way, which should succeed eventually. - opener := &hostOpener{name: name} - ctx := contexttest.Context(t) - pipeOps, err := Open(ctx, opener, flags) - if pipeOps != nil { - // Same as TestTryOpen. - pipeOps.Release() - } - - // Check that the partner opened the file successfully. - e := <-errch - if e.err != nil { - t.Errorf("%s: partner got error %v, wanted nil", test.desc, e.err) - continue - } - // If so, then close the partner fd to avoid leaking an fd. - syscall.Close(e.fd) - - // Check that our blocking open was successful. - if err != nil { - t.Errorf("%s: blocking open got error %v, wanted nil", test.desc, err) - continue - } - if pipeOps == nil { - t.Errorf("%s: blocking open got nil file, wanted non-nil", test.desc) - continue - } - } -} - -func TestCopiedReadAheadBuffer(t *testing.T) { - // Create the pipe. - name := pipename() - if err := mkpipe(name); err != nil { - t.Fatalf("failed to make host pipe: %v", err) - } - defer syscall.Unlink(name) - - // We're taking advantage of the fact that pipes opened read only always return - // success, but internally they are not deemed "opened" until we're sure that - // another writer comes along. This means we can open the same pipe write only - // with no problems + write to it, given that opener.Open already tried to open - // the pipe RDONLY and succeeded, which we know happened if TryOpen returns - // syserror.ErrwouldBlock. - // - // This simulates the open(RDONLY) <-> open(WRONLY)+write race we care about, but - // does not cause our test to be racy (which would be terrible). - opener := &hostOpener{name: name} - pipeOpenState := &pipeOpenState{} - ctx := contexttest.Context(t) - pipeOps, err := pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) - if pipeOps != nil { - pipeOps.Release() - t.Fatalf("open(%s, %o) got file, want nil", name, syscall.O_RDONLY) - } - if err != syserror.ErrWouldBlock { - t.Fatalf("open(%s, %o) got error %v, want %v", name, syscall.O_RDONLY, err, syserror.ErrWouldBlock) - } - - // Then open the same pipe write only and write some bytes to it. The next - // time we try to open the pipe read only again via the pipeOpenState, we should - // succeed and buffer some of the bytes written. - fd, err := syscall.Open(name, syscall.O_WRONLY, 0666) - if err != nil { - t.Fatalf("open(%s, %o) got error %v, want nil", name, syscall.O_WRONLY, err) - } - defer syscall.Close(fd) - - data := []byte("hello") - if n, err := syscall.Write(fd, data); n != len(data) || err != nil { - t.Fatalf("write(%v) got (%d, %v), want (%d, nil)", data, n, err, len(data)) - } - - // Try the read again, knowing that it should succeed this time. - pipeOps, err = pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) - if pipeOps == nil { - t.Fatalf("open(%s, %o) got nil file, want not nil", name, syscall.O_RDONLY) - } - defer pipeOps.Release() - - if err != nil { - t.Fatalf("open(%s, %o) got error %v, want nil", name, syscall.O_RDONLY, err) - } - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, pipeOps) - - // Check that the file we opened points to a pipe with a non-empty read ahead buffer. - bufsize := len(pipeOps.readAheadBuffer) - if bufsize != 1 { - t.Fatalf("read ahead buffer got %d bytes, want %d", bufsize, 1) - } - - // Now for the final test, try to read everything in, expecting to get back all of - // the bytes that were written at once. Note that in the wild there is no atomic - // read size so expecting to get all bytes from a single writer when there are - // multiple readers is a bad expectation. - buf := make([]byte, len(data)) - ioseq := usermem.BytesIOSequence(buf) - n, err := pipeOps.Read(ctx, file, ioseq, 0) - if err != nil { - t.Fatalf("read request got error %v, want nil", err) - } - if n != int64(len(data)) { - t.Fatalf("read request got %d bytes, want %d", n, len(data)) - } - if !bytes.Equal(buf, data) { - t.Errorf("read request got bytes [%v], want [%v]", buf, data) - } -} - -func TestPipeHangup(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // flags control how we open our end of the pipe and must be read - // only or write only. They also dicate how a coordinating partner - // fd is opened, which is their inverse (read only -> write only, etc). - flags fs.FileFlags - - // hangupSelf if true causes the test case to close our end of the pipe - // and causes hangup errors to be asserted on our coordinating partner's - // fd. If hangupSelf is false, then our partner's fd is closed and the - // hangup errors are expected on our end of the pipe. - hangupSelf bool - }{ - { - desc: "Read only gets hangup error", - flags: fs.FileFlags{Read: true}, - }, - { - desc: "Write only gets hangup error", - flags: fs.FileFlags{Write: true}, - }, - { - desc: "Read only generates hangup error", - flags: fs.FileFlags{Read: true}, - hangupSelf: true, - }, - { - desc: "Write only generates hangup error", - flags: fs.FileFlags{Write: true}, - hangupSelf: true, - }, - } { - if test.flags.Read == test.flags.Write { - t.Errorf("%s: test requires a single reader or writer", test.desc) - continue - } - - // Create the pipe. We do this per-test case to keep tests independent. - name := pipename() - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer syscall.Unlink(name) - - // Fire off a partner routine which tries to open the same pipe blocking, - // which will synchronize with us. The channel allows us to get back the - // fd once we expect this partner routine to succeed, so we can manifest - // hangup events more directly. - fdchan := make(chan int, 1) - go func() { - // Be explicit about the flags to protect the test from - // misconfiguration. - var flags int - if test.flags.Read { - flags = syscall.O_WRONLY - } else { - flags = syscall.O_RDONLY - } - fd, err := syscall.Open(name, flags, 0666) - if err != nil { - t.Logf("Open(%q, %o, 0666) partner failed: %v", name, flags, err) - } - fdchan <- fd - }() - - // Open our end in a blocking way to ensure that we coordinate. - opener := &hostOpener{name: name} - ctx := contexttest.Context(t) - pipeOps, err := Open(ctx, opener, test.flags) - if err != nil { - t.Errorf("%s: Open got error %v, want nil", test.desc, err) - continue - } - // Don't defer file.DecRef here because that causes the hangup we're - // trying to test for. - - // Expect the partner routine to have coordinated with us and get back - // its open fd. - f := <-fdchan - if f < 0 { - t.Errorf("%s: partner routine got fd %d, want > 0", test.desc, f) - pipeOps.Release() - continue - } - - if test.hangupSelf { - // Hangup self and assert that our partner got the expected hangup - // error. - pipeOps.Release() - - if test.flags.Read { - // Partner is writer. - assertWriterHungup(t, test.desc, fd.NewReadWriter(f)) - } else { - // Partner is reader. - assertReaderHungup(t, test.desc, fd.NewReadWriter(f)) - } - } else { - // Hangup our partner and expect us to get the hangup error. - syscall.Close(f) - defer pipeOps.Release() - - if test.flags.Read { - assertReaderHungup(t, test.desc, pipeOps.(*pipeOperations).file) - } else { - assertWriterHungup(t, test.desc, pipeOps.(*pipeOperations).file) - } - } - } -} - -func assertReaderHungup(t *testing.T, desc string, reader io.Reader) bool { - // Drain the pipe completely, it might have crap in it, but expect EOF eventually. - var err error - for err == nil { - _, err = reader.Read(make([]byte, 10)) - } - if err != io.EOF { - t.Errorf("%s: read from self after hangup got error %v, want %v", desc, err, io.EOF) - return false - } - return true -} - -func assertWriterHungup(t *testing.T, desc string, writer io.Writer) bool { - if _, err := writer.Write([]byte("hello")); unwrapError(err) != syscall.EPIPE { - t.Errorf("%s: write to self after hangup got error %v, want %v", desc, err, syscall.EPIPE) - return false - } - return true -} diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go deleted file mode 100644 index 69abc1e71..000000000 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ /dev/null @@ -1,505 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fdpipe - -import ( - "bytes" - "io" - "os" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" -) - -func singlePipeFD() (int, error) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - return -1, err - } - syscall.Close(fds[1]) - return fds[0], nil -} - -func singleDirFD() (int, error) { - return syscall.Open(os.TempDir(), syscall.O_RDONLY, 0666) -} - -func mockPipeDirent(t *testing.T) *fs.Dirent { - ctx := contexttest.Context(t) - node := fs.NewMockInodeOperations(ctx) - node.UAttr = fs.UnstableAttr{ - Perms: fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, - }, - } - inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - BlockSize: usermem.PageSize, - }) - return fs.NewDirent(ctx, inode, "") -} - -func TestNewPipe(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // getfd generates the fd to pass to newPipeOperations. - getfd func() (int, error) - - // flags are the fs.FileFlags passed to newPipeOperations. - flags fs.FileFlags - - // readAheadBuffer is the buffer passed to newPipeOperations. - readAheadBuffer []byte - - // err is the expected error. - err error - }{ - { - desc: "Cannot make new pipe from bad fd", - getfd: func() (int, error) { return -1, nil }, - err: syscall.EINVAL, - }, - { - desc: "Cannot make new pipe from non-pipe fd", - getfd: singleDirFD, - err: syscall.EINVAL, - }, - { - desc: "Can make new pipe from pipe fd", - getfd: singlePipeFD, - flags: fs.FileFlags{Read: true}, - readAheadBuffer: []byte("hello"), - }, - } { - gfd, err := test.getfd() - if err != nil { - t.Errorf("%s: getfd got (%d, %v), want (fd, nil)", test.desc, gfd, err) - continue - } - f := fd.New(gfd) - - p, err := newPipeOperations(contexttest.Context(t), nil, test.flags, f, test.readAheadBuffer) - if p != nil { - // This is necessary to remove the fd from the global fd notifier. - defer p.Release() - } else { - // If there is no p to DecRef on, because newPipeOperations failed, then the - // file still needs to be closed. - defer f.Close() - } - - if err != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - continue - } - // Check the state of the pipe given that it was successfully opened. - if err == nil { - if p == nil { - t.Errorf("%s: got nil pipe and nil error, want (pipe, nil)", test.desc) - continue - } - if flags := p.flags; test.flags != flags { - t.Errorf("%s: got file flags %s, want %s", test.desc, flags, test.flags) - continue - } - if len(test.readAheadBuffer) != len(p.readAheadBuffer) { - t.Errorf("%s: got read ahead buffer length %d, want %d", test.desc, len(p.readAheadBuffer), len(test.readAheadBuffer)) - continue - } - fileFlags, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(p.file.FD()), syscall.F_GETFL, 0) - if errno != 0 { - t.Errorf("%s: failed to get file flags for fd %d, got %v, want 0", test.desc, p.file.FD(), errno) - continue - } - if fileFlags&syscall.O_NONBLOCK == 0 { - t.Errorf("%s: pipe is blocking, expected non-blocking", test.desc) - continue - } - if !fdnotifier.HasFD(int32(f.FD())) { - t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD) - } - } - } -} - -func TestPipeDestruction(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - f := fd.New(fds[0]) - - // We don't care about the other end, just use the read end. - syscall.Close(fds[1]) - - // Test the read end, but it doesn't really matter which. - p, err := newPipeOperations(contexttest.Context(t), nil, fs.FileFlags{Read: true}, f, nil) - if err != nil { - f.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Drop our only reference, which should trigger the destructor. - p.Release() - - if fdnotifier.HasFD(int32(fds[0])) { - t.Fatalf("after DecRef fdnotifier has fd %d, want no longer registered", fds[0]) - } - if p.file != nil { - t.Errorf("after DecRef got file, want nil") - } -} - -type Seek struct{} - -type ReadDir struct{} - -type Writev struct { - Src usermem.IOSequence -} - -type Readv struct { - Dst usermem.IOSequence -} - -type Fsync struct{} - -func TestPipeRequest(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // request to execute. - context interface{} - - // flags determines whether to use the read or write end - // of the pipe, for this test it can only be Read or Write. - flags fs.FileFlags - - // keepOpenPartner if false closes the other end of the pipe, - // otherwise this is delayed until the end of the test. - keepOpenPartner bool - - // expected error - err error - }{ - { - desc: "ReadDir on pipe returns ENOTDIR", - context: &ReadDir{}, - err: syscall.ENOTDIR, - }, - { - desc: "Fsync on pipe returns EINVAL", - context: &Fsync{}, - err: syscall.EINVAL, - }, - { - desc: "Seek on pipe returns ESPIPE", - context: &Seek{}, - err: syscall.ESPIPE, - }, - { - desc: "Readv on pipe from empty buffer returns nil", - context: &Readv{Dst: usermem.BytesIOSequence(nil)}, - flags: fs.FileFlags{Read: true}, - }, - { - desc: "Readv on pipe from non-empty buffer and closed partner returns EOF", - context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, - flags: fs.FileFlags{Read: true}, - err: io.EOF, - }, - { - desc: "Readv on pipe from non-empty buffer and open partner returns EWOULDBLOCK", - context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, - flags: fs.FileFlags{Read: true}, - keepOpenPartner: true, - err: syserror.ErrWouldBlock, - }, - { - desc: "Writev on pipe from empty buffer returns nil", - context: &Writev{Src: usermem.BytesIOSequence(nil)}, - flags: fs.FileFlags{Write: true}, - }, - { - desc: "Writev on pipe from non-empty buffer and closed partner returns EPIPE", - context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, - flags: fs.FileFlags{Write: true}, - err: syscall.EPIPE, - }, - { - desc: "Writev on pipe from non-empty buffer and open partner succeeds", - context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, - flags: fs.FileFlags{Write: true}, - keepOpenPartner: true, - }, - } { - if test.flags.Read && test.flags.Write { - panic("both read and write not supported for this test") - } - - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Errorf("%s: failed to create pipes: got %v, want nil", test.desc, err) - continue - } - - // Configure the fd and partner fd based on the file flags. - testFd, partnerFd := fds[0], fds[1] - if test.flags.Write { - testFd, partnerFd = fds[1], fds[0] - } - - // Configure closing the fds. - if test.keepOpenPartner { - defer syscall.Close(partnerFd) - } else { - syscall.Close(partnerFd) - } - - // Create the pipe. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, test.flags, fd.New(testFd), nil) - if err != nil { - t.Fatalf("%s: newPipeOperations got error %v, want nil", test.desc, err) - } - defer p.Release() - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // Issue request via the appropriate function. - switch c := test.context.(type) { - case *Seek: - _, err = p.Seek(ctx, file, 0, 0) - case *ReadDir: - _, err = p.Readdir(ctx, file, nil) - case *Readv: - _, err = p.Read(ctx, file, c.Dst, 0) - case *Writev: - _, err = p.Write(ctx, file, c.Src, 0) - case *Fsync: - err = p.Fsync(ctx, file, 0, fs.FileMaxOffset, fs.SyncAll) - default: - t.Errorf("%s: unknown request type %T", test.desc, test.context) - } - - if unwrapError(err) != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - } - } -} - -func TestPipeReadAheadBuffer(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - rfile := fd.New(fds[0]) - - // Eventually close the write end, which is not wrapped in a pipe object. - defer syscall.Close(fds[1]) - - // Write some bytes to this end. - data := []byte("world") - if n, err := syscall.Write(fds[1], data); n != len(data) || err != nil { - rfile.Close() - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) - } - // Close the write end immediately, we don't care about it. - - buffered := []byte("hello ") - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, rfile, buffered) - if err != nil { - rfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - defer p.Release() - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // In total we expect to read data + buffered. - total := append(buffered, data...) - - buf := make([]byte, len(total)) - iov := usermem.BytesIOSequence(buf) - n, err := p.Read(contexttest.Context(t), file, iov, 0) - if err != nil { - t.Fatalf("read request got error %v, want nil", err) - } - if n != int64(len(total)) { - t.Fatalf("read request got %d bytes, want %d", n, len(total)) - } - if !bytes.Equal(buf, total) { - t.Errorf("read request got bytes [%v], want [%v]", buf, total) - } -} - -// This is very important for pipes in general because they can return -// EWOULDBLOCK and for those that block they must continue until they have read -// all of the data (and report it as such). -func TestPipeReadsAccumulate(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - rfile := fd.New(fds[0]) - - // Eventually close the write end, it doesn't depend on a pipe object. - defer syscall.Close(fds[1]) - - // Get a new read only pipe reference. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, rfile, nil) - if err != nil { - rfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests will - // likely be borked, because it's global :( - defer p.Release() - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // Write some some bytes to the pipe. - data := []byte("some message") - if n, err := syscall.Write(fds[1], data); n != len(data) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) - } - - // Construct a segment vec that is a bit more than we have written so we - // trigger an EWOULDBLOCK. - wantBytes := len(data) + 1 - readBuffer := make([]byte, wantBytes) - iov := usermem.BytesIOSequence(readBuffer) - n, err := p.Read(ctx, file, iov, 0) - total := n - iov = iov.DropFirst64(n) - if err != syserror.ErrWouldBlock { - t.Fatalf("Readv got error %v, want %v", err, syserror.ErrWouldBlock) - } - - // Write a few more bytes to allow us to read more/accumulate. - extra := []byte("extra") - if n, err := syscall.Write(fds[1], extra); n != len(extra) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(extra)) - } - - // This time, using the same request, we should not block. - n, err = p.Read(ctx, file, iov, 0) - total += n - if err != nil { - t.Fatalf("Readv got error %v, want nil", err) - } - - // Assert that the result we got back is cumulative. - if total != int64(wantBytes) { - t.Fatalf("Readv sequence got %d bytes, want %d", total, wantBytes) - } - - if want := append(data, extra[0]); !bytes.Equal(readBuffer, want) { - t.Errorf("Readv sequence got %v, want %v", readBuffer, want) - } -} - -// Same as TestReadsAccumulate. -func TestPipeWritesAccumulate(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - wfile := fd.New(fds[1]) - - // Eventually close the read end, it doesn't depend on a pipe object. - defer syscall.Close(fds[0]) - - // Get a new write only pipe reference. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Write: true}, wfile, nil) - if err != nil { - wfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests - // will likely be borked, because it's global :( - defer p.Release() - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - pipeSize, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(wfile.FD()), syscall.F_GETPIPE_SZ, 0) - if errno != 0 { - t.Fatalf("fcntl(F_GETPIPE_SZ) failed: %v", errno) - } - t.Logf("Pipe buffer size: %d", pipeSize) - - // Construct a segment vec that is larger than the pipe size to trigger an - // EWOULDBLOCK. - wantBytes := int(pipeSize) * 2 - writeBuffer := make([]byte, wantBytes) - for i := 0; i < wantBytes; i++ { - writeBuffer[i] = 'a' - } - iov := usermem.BytesIOSequence(writeBuffer) - n, err := p.Write(ctx, file, iov, 0) - if err != syserror.ErrWouldBlock { - t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock) - } - if n != int64(pipeSize) { - t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) - } - total := n - iov = iov.DropFirst64(n) - - // Read the entire pipe buf size to make space for the second half. - readBuffer := make([]byte, n) - if n, err := syscall.Read(fds[0], readBuffer); n != len(readBuffer) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(readBuffer)) - } - if !bytes.Equal(readBuffer, writeBuffer[:len(readBuffer)]) { - t.Fatalf("wrong data read from pipe, got: %v, want: %v", readBuffer, writeBuffer) - } - - // This time we should not block. - n, err = p.Write(ctx, file, iov, 0) - if err != nil { - t.Fatalf("Writev got error %v, want nil", err) - } - if n != int64(pipeSize) { - t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) - } - total += n - - // Assert that the result we got back is cumulative. - if total != int64(wantBytes) { - t.Fatalf("Writev sequence got %d bytes, want %d", total, wantBytes) - } -} diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go deleted file mode 100644 index 2fb824d5c..000000000 --- a/pkg/sentry/fs/file_overlay_test.go +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -func TestReaddir(t *testing.T) { - ctx := contexttest.Context(t) - ctx = &rootContext{ - Context: ctx, - root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), - } - for _, test := range []struct { - // Test description. - desc string - - // Lookup parameters. - dir *fs.Inode - - // Want from lookup. - err error - names []string - }{ - { - desc: "no upper, lower has entries", - dir: fs.NewTestOverlayDir(ctx, - nil, /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - {name: "b"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper has entries, no lower", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - {name: "b"}, - }, nil), /* upper */ - nil, /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper and lower, entries combine", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "b"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper and lower, entries combine, none are masked", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "c"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "c"}, - }, - { - desc: "upper and lower, entries combine, upper masks some of lower", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "b"}, /* will be masked */ - {name: "c"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "c"}, - }, - } { - t.Run(test.desc, func(t *testing.T) { - openDir, err := test.dir.GetFile(ctx, fs.NewDirent(ctx, test.dir, "stub"), fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile got error %v, want nil", err) - } - stubSerializer := &fs.CollectEntriesSerializer{} - err = openDir.Readdir(ctx, stubSerializer) - if err != test.err { - t.Fatalf("Readdir got error %v, want nil", err) - } - if err != nil { - return - } - if !reflect.DeepEqual(stubSerializer.Order, test.names) { - t.Errorf("Readdir got names %v, want %v", stubSerializer.Order, test.names) - } - }) - } -} - -func TestReaddirRevalidation(t *testing.T) { - ctx := contexttest.Context(t) - ctx = &rootContext{ - Context: ctx, - root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), - } - - // Create an overlay with two directories, each with one file. - upper := newTestRamfsDir(ctx, []dirContent{{name: "a"}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: "b"}}, nil) - overlay := fs.NewTestOverlayDir(ctx, upper, lower, true /* revalidate */) - - // Get a handle to the dirent in the upper filesystem so that we can - // modify it without going through the dirent. - upperDir := upper.InodeOperations.(*dir).InodeOperations.(*ramfs.Dir) - - // Check that overlay returns the files from both upper and lower. - openDir, err := overlay.GetFile(ctx, fs.NewDirent(ctx, overlay, "stub"), fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile got error %v, want nil", err) - } - ser := &fs.CollectEntriesSerializer{} - if err := openDir.Readdir(ctx, ser); err != nil { - t.Fatalf("Readdir got error %v, want nil", err) - } - got, want := ser.Order, []string{".", "..", "a", "b"} - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Remove "a" from the upper and add "c". - if err := upperDir.Remove(ctx, upper, "a"); err != nil { - t.Fatalf("error removing child: %v", err) - } - upperDir.AddChild(ctx, "c", fs.NewInode(ctx, fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0), - upper.MountSource, fs.StableAttr{Type: fs.RegularFile})) - - // Seek to beginning of the directory and do the readdir again. - if _, err := openDir.Seek(ctx, fs.SeekSet, 0); err != nil { - t.Fatalf("error seeking to beginning of dir: %v", err) - } - ser = &fs.CollectEntriesSerializer{} - if err := openDir.Readdir(ctx, ser); err != nil { - t.Fatalf("Readdir got error %v, want nil", err) - } - - // Readdir should return the updated children. - got, want = ser.Order, []string{".", "..", "b", "c"} - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } -} - -// TestReaddirOverlayFrozen tests that calling Readdir on an overlay file with -// a frozen dirent tree does not make Readdir calls to the underlying files. -func TestReaddirOverlayFrozen(t *testing.T) { - ctx := contexttest.Context(t) - - // Create an overlay with two directories, each with two files. - upper := newTestRamfsDir(ctx, []dirContent{{name: "upper-file1"}, {name: "upper-file2"}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: "lower-file1"}, {name: "lower-file2"}}, nil) - overlayInode := fs.NewTestOverlayDir(ctx, upper, lower, false) - - // Set that overlay as the root. - root := fs.NewDirent(ctx, overlayInode, "root") - ctx = &rootContext{ - Context: ctx, - root: root, - } - - // Check that calling Readdir on the root now returns all 4 files (2 - // from each layer in the overlay). - rootFile, err := root.Inode.GetFile(ctx, root, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("root.Inode.GetFile failed: %v", err) - } - defer rootFile.DecRef() - ser := &fs.CollectEntriesSerializer{} - if err := rootFile.Readdir(ctx, ser); err != nil { - t.Fatalf("rootFile.Readdir failed: %v", err) - } - if got, want := ser.Order, []string{".", "..", "lower-file1", "lower-file2", "upper-file1", "upper-file2"}; !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Readdir should have been called on upper and lower. - upperDir := upper.InodeOperations.(*dir) - lowerDir := lower.InodeOperations.(*dir) - if !upperDir.ReaddirCalled { - t.Errorf("upperDir.ReaddirCalled got %v, want true", upperDir.ReaddirCalled) - } - if !lowerDir.ReaddirCalled { - t.Errorf("lowerDir.ReaddirCalled got %v, want true", lowerDir.ReaddirCalled) - } - - // Reset. - upperDir.ReaddirCalled = false - lowerDir.ReaddirCalled = false - - // Take references on "upper-file1" and "lower-file1", pinning them in - // the dirent tree. - for _, name := range []string{"upper-file1", "lower-file1"} { - if _, err := root.Walk(ctx, root, name); err != nil { - t.Fatalf("root.Walk(%q) failed: %v", name, err) - } - // Don't drop a reference on the returned dirent so that it - // will stay in the tree. - } - - // Freeze the dirent tree. - root.Freeze() - - // Seek back to the beginning of the file. - if _, err := rootFile.Seek(ctx, fs.SeekSet, 0); err != nil { - t.Fatalf("error seeking to beginning of directory: %v", err) - } - - // Calling Readdir on the root now will return only the pinned - // children. - ser = &fs.CollectEntriesSerializer{} - if err := rootFile.Readdir(ctx, ser); err != nil { - t.Fatalf("rootFile.Readdir failed: %v", err) - } - if got, want := ser.Order, []string{".", "..", "lower-file1", "upper-file1"}; !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Readdir should NOT have been called on upper or lower. - if upperDir.ReaddirCalled { - t.Errorf("upperDir.ReaddirCalled got %v, want false", upperDir.ReaddirCalled) - } - if lowerDir.ReaddirCalled { - t.Errorf("lowerDir.ReaddirCalled got %v, want false", lowerDir.ReaddirCalled) - } -} - -type rootContext struct { - context.Context - root *fs.Dirent -} - -// Value implements context.Context. -func (r *rootContext) Value(key interface{}) interface{} { - switch key { - case fs.CtxRoot: - r.root.IncRef() - return r.root - default: - return r.Context.Value(key) - } -} diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD deleted file mode 100644 index a9d6d9301..000000000 --- a/pkg/sentry/fs/filetest/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "filetest", - testonly = 1, - srcs = ["filetest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/filetest", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/filetest/filetest.go b/pkg/sentry/fs/filetest/filetest.go deleted file mode 100644 index 22270a494..000000000 --- a/pkg/sentry/fs/filetest/filetest.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package filetest provides a test implementation of an fs.File. -package filetest - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/anon" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -// TestFileOperations is an implementation of the File interface. It provides all -// required methods. -type TestFileOperations struct { - fsutil.FileNoopRelease `state:"nosave"` - fsutil.FilePipeSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` -} - -// NewTestFile creates and initializes a new test file. -func NewTestFile(tb testing.TB) *fs.File { - ctx := contexttest.Context(tb) - dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "test") - return fs.NewFile(ctx, dirent, fs.FileFlags{}, &TestFileOperations{}) -} - -// Read just fails the request. -func (*TestFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, fmt.Errorf("Readv not implemented") -} - -// Write just fails the request. -func (*TestFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, fmt.Errorf("Writev not implemented") -} diff --git a/pkg/sentry/fs/fs_state_autogen.go b/pkg/sentry/fs/fs_state_autogen.go new file mode 100755 index 000000000..5ea2669e6 --- /dev/null +++ b/pkg/sentry/fs/fs_state_autogen.go @@ -0,0 +1,632 @@ +// automatically generated by stateify. + +package fs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *StableAttr) beforeSave() {} +func (x *StableAttr) save(m state.Map) { + x.beforeSave() + m.Save("Type", &x.Type) + m.Save("DeviceID", &x.DeviceID) + m.Save("InodeID", &x.InodeID) + m.Save("BlockSize", &x.BlockSize) + m.Save("DeviceFileMajor", &x.DeviceFileMajor) + m.Save("DeviceFileMinor", &x.DeviceFileMinor) +} + +func (x *StableAttr) afterLoad() {} +func (x *StableAttr) load(m state.Map) { + m.Load("Type", &x.Type) + m.Load("DeviceID", &x.DeviceID) + m.Load("InodeID", &x.InodeID) + m.Load("BlockSize", &x.BlockSize) + m.Load("DeviceFileMajor", &x.DeviceFileMajor) + m.Load("DeviceFileMinor", &x.DeviceFileMinor) +} + +func (x *UnstableAttr) beforeSave() {} +func (x *UnstableAttr) save(m state.Map) { + x.beforeSave() + m.Save("Size", &x.Size) + m.Save("Usage", &x.Usage) + m.Save("Perms", &x.Perms) + m.Save("Owner", &x.Owner) + m.Save("AccessTime", &x.AccessTime) + m.Save("ModificationTime", &x.ModificationTime) + m.Save("StatusChangeTime", &x.StatusChangeTime) + m.Save("Links", &x.Links) +} + +func (x *UnstableAttr) afterLoad() {} +func (x *UnstableAttr) load(m state.Map) { + m.Load("Size", &x.Size) + m.Load("Usage", &x.Usage) + m.Load("Perms", &x.Perms) + m.Load("Owner", &x.Owner) + m.Load("AccessTime", &x.AccessTime) + m.Load("ModificationTime", &x.ModificationTime) + m.Load("StatusChangeTime", &x.StatusChangeTime) + m.Load("Links", &x.Links) +} + +func (x *AttrMask) beforeSave() {} +func (x *AttrMask) save(m state.Map) { + x.beforeSave() + m.Save("Type", &x.Type) + m.Save("DeviceID", &x.DeviceID) + m.Save("InodeID", &x.InodeID) + m.Save("BlockSize", &x.BlockSize) + m.Save("Size", &x.Size) + m.Save("Usage", &x.Usage) + m.Save("Perms", &x.Perms) + m.Save("UID", &x.UID) + m.Save("GID", &x.GID) + m.Save("AccessTime", &x.AccessTime) + m.Save("ModificationTime", &x.ModificationTime) + m.Save("StatusChangeTime", &x.StatusChangeTime) + m.Save("Links", &x.Links) +} + +func (x *AttrMask) afterLoad() {} +func (x *AttrMask) load(m state.Map) { + m.Load("Type", &x.Type) + m.Load("DeviceID", &x.DeviceID) + m.Load("InodeID", &x.InodeID) + m.Load("BlockSize", &x.BlockSize) + m.Load("Size", &x.Size) + m.Load("Usage", &x.Usage) + m.Load("Perms", &x.Perms) + m.Load("UID", &x.UID) + m.Load("GID", &x.GID) + m.Load("AccessTime", &x.AccessTime) + m.Load("ModificationTime", &x.ModificationTime) + m.Load("StatusChangeTime", &x.StatusChangeTime) + m.Load("Links", &x.Links) +} + +func (x *PermMask) beforeSave() {} +func (x *PermMask) save(m state.Map) { + x.beforeSave() + m.Save("Read", &x.Read) + m.Save("Write", &x.Write) + m.Save("Execute", &x.Execute) +} + +func (x *PermMask) afterLoad() {} +func (x *PermMask) load(m state.Map) { + m.Load("Read", &x.Read) + m.Load("Write", &x.Write) + m.Load("Execute", &x.Execute) +} + +func (x *FilePermissions) beforeSave() {} +func (x *FilePermissions) save(m state.Map) { + x.beforeSave() + m.Save("User", &x.User) + m.Save("Group", &x.Group) + m.Save("Other", &x.Other) + m.Save("Sticky", &x.Sticky) + m.Save("SetUID", &x.SetUID) + m.Save("SetGID", &x.SetGID) +} + +func (x *FilePermissions) afterLoad() {} +func (x *FilePermissions) load(m state.Map) { + m.Load("User", &x.User) + m.Load("Group", &x.Group) + m.Load("Other", &x.Other) + m.Load("Sticky", &x.Sticky) + m.Load("SetUID", &x.SetUID) + m.Load("SetGID", &x.SetGID) +} + +func (x *FileOwner) beforeSave() {} +func (x *FileOwner) save(m state.Map) { + x.beforeSave() + m.Save("UID", &x.UID) + m.Save("GID", &x.GID) +} + +func (x *FileOwner) afterLoad() {} +func (x *FileOwner) load(m state.Map) { + m.Load("UID", &x.UID) + m.Load("GID", &x.GID) +} + +func (x *DentAttr) beforeSave() {} +func (x *DentAttr) save(m state.Map) { + x.beforeSave() + m.Save("Type", &x.Type) + m.Save("InodeID", &x.InodeID) +} + +func (x *DentAttr) afterLoad() {} +func (x *DentAttr) load(m state.Map) { + m.Load("Type", &x.Type) + m.Load("InodeID", &x.InodeID) +} + +func (x *SortedDentryMap) beforeSave() {} +func (x *SortedDentryMap) save(m state.Map) { + x.beforeSave() + m.Save("names", &x.names) + m.Save("entries", &x.entries) +} + +func (x *SortedDentryMap) afterLoad() {} +func (x *SortedDentryMap) load(m state.Map) { + m.Load("names", &x.names) + m.Load("entries", &x.entries) +} + +func (x *Dirent) save(m state.Map) { + x.beforeSave() + var children map[string]*Dirent = x.saveChildren() + m.SaveValue("children", children) + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("userVisible", &x.userVisible) + m.Save("Inode", &x.Inode) + m.Save("name", &x.name) + m.Save("parent", &x.parent) + m.Save("deleted", &x.deleted) + m.Save("frozen", &x.frozen) + m.Save("mounted", &x.mounted) +} + +func (x *Dirent) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("userVisible", &x.userVisible) + m.Load("Inode", &x.Inode) + m.Load("name", &x.name) + m.Load("parent", &x.parent) + m.Load("deleted", &x.deleted) + m.Load("frozen", &x.frozen) + m.Load("mounted", &x.mounted) + m.LoadValue("children", new(map[string]*Dirent), func(y interface{}) { x.loadChildren(y.(map[string]*Dirent)) }) + m.AfterLoad(x.afterLoad) +} + +func (x *DirentCache) beforeSave() {} +func (x *DirentCache) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.currentSize) { m.Failf("currentSize is %v, expected zero", x.currentSize) } + if !state.IsZeroValue(x.list) { m.Failf("list is %v, expected zero", x.list) } + m.Save("maxSize", &x.maxSize) + m.Save("limit", &x.limit) +} + +func (x *DirentCache) afterLoad() {} +func (x *DirentCache) load(m state.Map) { + m.Load("maxSize", &x.maxSize) + m.Load("limit", &x.limit) +} + +func (x *DirentCacheLimiter) beforeSave() {} +func (x *DirentCacheLimiter) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.count) { m.Failf("count is %v, expected zero", x.count) } + m.Save("max", &x.max) +} + +func (x *DirentCacheLimiter) afterLoad() {} +func (x *DirentCacheLimiter) load(m state.Map) { + m.Load("max", &x.max) +} + +func (x *direntList) beforeSave() {} +func (x *direntList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *direntList) afterLoad() {} +func (x *direntList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *direntEntry) beforeSave() {} +func (x *direntEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *direntEntry) afterLoad() {} +func (x *direntEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *eventList) beforeSave() {} +func (x *eventList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *eventList) afterLoad() {} +func (x *eventList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *eventEntry) beforeSave() {} +func (x *eventEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *eventEntry) afterLoad() {} +func (x *eventEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *File) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("UniqueID", &x.UniqueID) + m.Save("Dirent", &x.Dirent) + m.Save("flags", &x.flags) + m.Save("async", &x.async) + m.Save("FileOperations", &x.FileOperations) + m.Save("offset", &x.offset) +} + +func (x *File) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("UniqueID", &x.UniqueID) + m.Load("Dirent", &x.Dirent) + m.Load("flags", &x.flags) + m.Load("async", &x.async) + m.LoadWait("FileOperations", &x.FileOperations) + m.Load("offset", &x.offset) + m.AfterLoad(x.afterLoad) +} + +func (x *overlayFileOperations) beforeSave() {} +func (x *overlayFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("upper", &x.upper) + m.Save("lower", &x.lower) + m.Save("dirCursor", &x.dirCursor) +} + +func (x *overlayFileOperations) afterLoad() {} +func (x *overlayFileOperations) load(m state.Map) { + m.Load("upper", &x.upper) + m.Load("lower", &x.lower) + m.Load("dirCursor", &x.dirCursor) +} + +func (x *overlayMappingIdentity) beforeSave() {} +func (x *overlayMappingIdentity) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("id", &x.id) + m.Save("overlayFile", &x.overlayFile) +} + +func (x *overlayMappingIdentity) afterLoad() {} +func (x *overlayMappingIdentity) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("id", &x.id) + m.Load("overlayFile", &x.overlayFile) +} + +func (x *MountSourceFlags) beforeSave() {} +func (x *MountSourceFlags) save(m state.Map) { + x.beforeSave() + m.Save("ReadOnly", &x.ReadOnly) + m.Save("NoAtime", &x.NoAtime) + m.Save("ForcePageCache", &x.ForcePageCache) + m.Save("NoExec", &x.NoExec) +} + +func (x *MountSourceFlags) afterLoad() {} +func (x *MountSourceFlags) load(m state.Map) { + m.Load("ReadOnly", &x.ReadOnly) + m.Load("NoAtime", &x.NoAtime) + m.Load("ForcePageCache", &x.ForcePageCache) + m.Load("NoExec", &x.NoExec) +} + +func (x *FileFlags) beforeSave() {} +func (x *FileFlags) save(m state.Map) { + x.beforeSave() + m.Save("Direct", &x.Direct) + m.Save("NonBlocking", &x.NonBlocking) + m.Save("DSync", &x.DSync) + m.Save("Sync", &x.Sync) + m.Save("Append", &x.Append) + m.Save("Read", &x.Read) + m.Save("Write", &x.Write) + m.Save("Pread", &x.Pread) + m.Save("Pwrite", &x.Pwrite) + m.Save("Directory", &x.Directory) + m.Save("Async", &x.Async) + m.Save("LargeFile", &x.LargeFile) + m.Save("NonSeekable", &x.NonSeekable) +} + +func (x *FileFlags) afterLoad() {} +func (x *FileFlags) load(m state.Map) { + m.Load("Direct", &x.Direct) + m.Load("NonBlocking", &x.NonBlocking) + m.Load("DSync", &x.DSync) + m.Load("Sync", &x.Sync) + m.Load("Append", &x.Append) + m.Load("Read", &x.Read) + m.Load("Write", &x.Write) + m.Load("Pread", &x.Pread) + m.Load("Pwrite", &x.Pwrite) + m.Load("Directory", &x.Directory) + m.Load("Async", &x.Async) + m.Load("LargeFile", &x.LargeFile) + m.Load("NonSeekable", &x.NonSeekable) +} + +func (x *Inode) beforeSave() {} +func (x *Inode) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("InodeOperations", &x.InodeOperations) + m.Save("StableAttr", &x.StableAttr) + m.Save("LockCtx", &x.LockCtx) + m.Save("Watches", &x.Watches) + m.Save("MountSource", &x.MountSource) + m.Save("overlay", &x.overlay) +} + +func (x *Inode) afterLoad() {} +func (x *Inode) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("InodeOperations", &x.InodeOperations) + m.Load("StableAttr", &x.StableAttr) + m.Load("LockCtx", &x.LockCtx) + m.Load("Watches", &x.Watches) + m.Load("MountSource", &x.MountSource) + m.Load("overlay", &x.overlay) +} + +func (x *LockCtx) beforeSave() {} +func (x *LockCtx) save(m state.Map) { + x.beforeSave() + m.Save("Posix", &x.Posix) + m.Save("BSD", &x.BSD) +} + +func (x *LockCtx) afterLoad() {} +func (x *LockCtx) load(m state.Map) { + m.Load("Posix", &x.Posix) + m.Load("BSD", &x.BSD) +} + +func (x *Watches) beforeSave() {} +func (x *Watches) save(m state.Map) { + x.beforeSave() + m.Save("ws", &x.ws) + m.Save("unlinked", &x.unlinked) +} + +func (x *Watches) afterLoad() {} +func (x *Watches) load(m state.Map) { + m.Load("ws", &x.ws) + m.Load("unlinked", &x.unlinked) +} + +func (x *Inotify) beforeSave() {} +func (x *Inotify) save(m state.Map) { + x.beforeSave() + m.Save("id", &x.id) + m.Save("events", &x.events) + m.Save("scratch", &x.scratch) + m.Save("nextWatch", &x.nextWatch) + m.Save("watches", &x.watches) +} + +func (x *Inotify) afterLoad() {} +func (x *Inotify) load(m state.Map) { + m.Load("id", &x.id) + m.Load("events", &x.events) + m.Load("scratch", &x.scratch) + m.Load("nextWatch", &x.nextWatch) + m.Load("watches", &x.watches) +} + +func (x *Event) beforeSave() {} +func (x *Event) save(m state.Map) { + x.beforeSave() + m.Save("eventEntry", &x.eventEntry) + m.Save("wd", &x.wd) + m.Save("mask", &x.mask) + m.Save("cookie", &x.cookie) + m.Save("len", &x.len) + m.Save("name", &x.name) +} + +func (x *Event) afterLoad() {} +func (x *Event) load(m state.Map) { + m.Load("eventEntry", &x.eventEntry) + m.Load("wd", &x.wd) + m.Load("mask", &x.mask) + m.Load("cookie", &x.cookie) + m.Load("len", &x.len) + m.Load("name", &x.name) +} + +func (x *Watch) beforeSave() {} +func (x *Watch) save(m state.Map) { + x.beforeSave() + m.Save("owner", &x.owner) + m.Save("wd", &x.wd) + m.Save("target", &x.target) + m.Save("unpinned", &x.unpinned) + m.Save("mask", &x.mask) + m.Save("pins", &x.pins) +} + +func (x *Watch) afterLoad() {} +func (x *Watch) load(m state.Map) { + m.Load("owner", &x.owner) + m.Load("wd", &x.wd) + m.Load("target", &x.target) + m.Load("unpinned", &x.unpinned) + m.Load("mask", &x.mask) + m.Load("pins", &x.pins) +} + +func (x *MountSource) beforeSave() {} +func (x *MountSource) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("MountSourceOperations", &x.MountSourceOperations) + m.Save("FilesystemType", &x.FilesystemType) + m.Save("Flags", &x.Flags) + m.Save("fscache", &x.fscache) + m.Save("direntRefs", &x.direntRefs) +} + +func (x *MountSource) afterLoad() {} +func (x *MountSource) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("MountSourceOperations", &x.MountSourceOperations) + m.Load("FilesystemType", &x.FilesystemType) + m.Load("Flags", &x.Flags) + m.Load("fscache", &x.fscache) + m.Load("direntRefs", &x.direntRefs) +} + +func (x *SimpleMountSourceOperations) beforeSave() {} +func (x *SimpleMountSourceOperations) save(m state.Map) { + x.beforeSave() + m.Save("keep", &x.keep) + m.Save("revalidate", &x.revalidate) + m.Save("cacheReaddir", &x.cacheReaddir) +} + +func (x *SimpleMountSourceOperations) afterLoad() {} +func (x *SimpleMountSourceOperations) load(m state.Map) { + m.Load("keep", &x.keep) + m.Load("revalidate", &x.revalidate) + m.Load("cacheReaddir", &x.cacheReaddir) +} + +func (x *overlayMountSourceOperations) beforeSave() {} +func (x *overlayMountSourceOperations) save(m state.Map) { + x.beforeSave() + m.Save("upper", &x.upper) + m.Save("lower", &x.lower) +} + +func (x *overlayMountSourceOperations) afterLoad() {} +func (x *overlayMountSourceOperations) load(m state.Map) { + m.Load("upper", &x.upper) + m.Load("lower", &x.lower) +} + +func (x *overlayFilesystem) beforeSave() {} +func (x *overlayFilesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *overlayFilesystem) afterLoad() {} +func (x *overlayFilesystem) load(m state.Map) { +} + +func (x *Mount) beforeSave() {} +func (x *Mount) save(m state.Map) { + x.beforeSave() + m.Save("ID", &x.ID) + m.Save("ParentID", &x.ParentID) + m.Save("root", &x.root) + m.Save("previous", &x.previous) +} + +func (x *Mount) afterLoad() {} +func (x *Mount) load(m state.Map) { + m.Load("ID", &x.ID) + m.Load("ParentID", &x.ParentID) + m.Load("root", &x.root) + m.Load("previous", &x.previous) +} + +func (x *MountNamespace) beforeSave() {} +func (x *MountNamespace) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("userns", &x.userns) + m.Save("root", &x.root) + m.Save("mounts", &x.mounts) + m.Save("mountID", &x.mountID) +} + +func (x *MountNamespace) afterLoad() {} +func (x *MountNamespace) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("userns", &x.userns) + m.Load("root", &x.root) + m.Load("mounts", &x.mounts) + m.Load("mountID", &x.mountID) +} + +func (x *overlayEntry) beforeSave() {} +func (x *overlayEntry) save(m state.Map) { + x.beforeSave() + m.Save("lowerExists", &x.lowerExists) + m.Save("lower", &x.lower) + m.Save("mappings", &x.mappings) + m.Save("upper", &x.upper) + m.Save("dirCache", &x.dirCache) +} + +func (x *overlayEntry) afterLoad() {} +func (x *overlayEntry) load(m state.Map) { + m.Load("lowerExists", &x.lowerExists) + m.Load("lower", &x.lower) + m.Load("mappings", &x.mappings) + m.Load("upper", &x.upper) + m.Load("dirCache", &x.dirCache) +} + +func init() { + state.Register("fs.StableAttr", (*StableAttr)(nil), state.Fns{Save: (*StableAttr).save, Load: (*StableAttr).load}) + state.Register("fs.UnstableAttr", (*UnstableAttr)(nil), state.Fns{Save: (*UnstableAttr).save, Load: (*UnstableAttr).load}) + state.Register("fs.AttrMask", (*AttrMask)(nil), state.Fns{Save: (*AttrMask).save, Load: (*AttrMask).load}) + state.Register("fs.PermMask", (*PermMask)(nil), state.Fns{Save: (*PermMask).save, Load: (*PermMask).load}) + state.Register("fs.FilePermissions", (*FilePermissions)(nil), state.Fns{Save: (*FilePermissions).save, Load: (*FilePermissions).load}) + state.Register("fs.FileOwner", (*FileOwner)(nil), state.Fns{Save: (*FileOwner).save, Load: (*FileOwner).load}) + state.Register("fs.DentAttr", (*DentAttr)(nil), state.Fns{Save: (*DentAttr).save, Load: (*DentAttr).load}) + state.Register("fs.SortedDentryMap", (*SortedDentryMap)(nil), state.Fns{Save: (*SortedDentryMap).save, Load: (*SortedDentryMap).load}) + state.Register("fs.Dirent", (*Dirent)(nil), state.Fns{Save: (*Dirent).save, Load: (*Dirent).load}) + state.Register("fs.DirentCache", (*DirentCache)(nil), state.Fns{Save: (*DirentCache).save, Load: (*DirentCache).load}) + state.Register("fs.DirentCacheLimiter", (*DirentCacheLimiter)(nil), state.Fns{Save: (*DirentCacheLimiter).save, Load: (*DirentCacheLimiter).load}) + state.Register("fs.direntList", (*direntList)(nil), state.Fns{Save: (*direntList).save, Load: (*direntList).load}) + state.Register("fs.direntEntry", (*direntEntry)(nil), state.Fns{Save: (*direntEntry).save, Load: (*direntEntry).load}) + state.Register("fs.eventList", (*eventList)(nil), state.Fns{Save: (*eventList).save, Load: (*eventList).load}) + state.Register("fs.eventEntry", (*eventEntry)(nil), state.Fns{Save: (*eventEntry).save, Load: (*eventEntry).load}) + state.Register("fs.File", (*File)(nil), state.Fns{Save: (*File).save, Load: (*File).load}) + state.Register("fs.overlayFileOperations", (*overlayFileOperations)(nil), state.Fns{Save: (*overlayFileOperations).save, Load: (*overlayFileOperations).load}) + state.Register("fs.overlayMappingIdentity", (*overlayMappingIdentity)(nil), state.Fns{Save: (*overlayMappingIdentity).save, Load: (*overlayMappingIdentity).load}) + state.Register("fs.MountSourceFlags", (*MountSourceFlags)(nil), state.Fns{Save: (*MountSourceFlags).save, Load: (*MountSourceFlags).load}) + state.Register("fs.FileFlags", (*FileFlags)(nil), state.Fns{Save: (*FileFlags).save, Load: (*FileFlags).load}) + state.Register("fs.Inode", (*Inode)(nil), state.Fns{Save: (*Inode).save, Load: (*Inode).load}) + state.Register("fs.LockCtx", (*LockCtx)(nil), state.Fns{Save: (*LockCtx).save, Load: (*LockCtx).load}) + state.Register("fs.Watches", (*Watches)(nil), state.Fns{Save: (*Watches).save, Load: (*Watches).load}) + state.Register("fs.Inotify", (*Inotify)(nil), state.Fns{Save: (*Inotify).save, Load: (*Inotify).load}) + state.Register("fs.Event", (*Event)(nil), state.Fns{Save: (*Event).save, Load: (*Event).load}) + state.Register("fs.Watch", (*Watch)(nil), state.Fns{Save: (*Watch).save, Load: (*Watch).load}) + state.Register("fs.MountSource", (*MountSource)(nil), state.Fns{Save: (*MountSource).save, Load: (*MountSource).load}) + state.Register("fs.SimpleMountSourceOperations", (*SimpleMountSourceOperations)(nil), state.Fns{Save: (*SimpleMountSourceOperations).save, Load: (*SimpleMountSourceOperations).load}) + state.Register("fs.overlayMountSourceOperations", (*overlayMountSourceOperations)(nil), state.Fns{Save: (*overlayMountSourceOperations).save, Load: (*overlayMountSourceOperations).load}) + state.Register("fs.overlayFilesystem", (*overlayFilesystem)(nil), state.Fns{Save: (*overlayFilesystem).save, Load: (*overlayFilesystem).load}) + state.Register("fs.Mount", (*Mount)(nil), state.Fns{Save: (*Mount).save, Load: (*Mount).load}) + state.Register("fs.MountNamespace", (*MountNamespace)(nil), state.Fns{Save: (*MountNamespace).save, Load: (*MountNamespace).load}) + state.Register("fs.overlayEntry", (*overlayEntry)(nil), state.Fns{Save: (*overlayEntry).save, Load: (*overlayEntry).load}) +} diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD deleted file mode 100644 index 6499f87ac..000000000 --- a/pkg/sentry/fs/fsutil/BUILD +++ /dev/null @@ -1,118 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "dirty_set_impl", - out = "dirty_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", - }, - package = "fsutil", - prefix = "Dirty", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.MappableRange", - "Value": "DirtyInfo", - "Functions": "dirtySetFunctions", - }, -) - -go_template_instance( - name = "frame_ref_set_impl", - out = "frame_ref_set_impl.go", - imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", - }, - package = "fsutil", - prefix = "frameRef", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "platform.FileRange", - "Value": "uint64", - "Functions": "frameRefSetFunctions", - }, -) - -go_template_instance( - name = "file_range_set_impl", - out = "file_range_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", - }, - package = "fsutil", - prefix = "FileRange", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.MappableRange", - "Value": "uint64", - "Functions": "fileRangeSetFunctions", - }, -) - -go_library( - name = "fsutil", - srcs = [ - "dirty_set.go", - "dirty_set_impl.go", - "file.go", - "file_range_set.go", - "file_range_set_impl.go", - "frame_ref_set.go", - "frame_ref_set_impl.go", - "fsutil.go", - "host_file_mapper.go", - "host_file_mapper_state.go", - "host_file_mapper_unsafe.go", - "host_mappable.go", - "inode.go", - "inode_cached.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fsutil", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/safemem", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/state", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "fsutil_test", - size = "small", - srcs = [ - "dirty_set_test.go", - "inode_cached_test.go", - ], - embed = [":fsutil"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/fs/fsutil/README.md b/pkg/sentry/fs/fsutil/README.md deleted file mode 100644 index 8be367334..000000000 --- a/pkg/sentry/fs/fsutil/README.md +++ /dev/null @@ -1,207 +0,0 @@ -This package provides utilities for implementing virtual filesystem objects. - -[TOC] - -## Page cache - -`CachingInodeOperations` implements a page cache for files that cannot use the -host page cache. Normally these are files that store their data in a remote -filesystem. This also applies to files that are accessed on a platform that does -not support directly memory mapping host file descriptors (e.g. the ptrace -platform). - -An `CachingInodeOperations` buffers regions of a single file into memory. It is -owned by an `fs.Inode`, the in-memory representation of a file (all open file -descriptors are backed by an `fs.Inode`). The `fs.Inode` provides operations for -reading memory into an `CachingInodeOperations`, to represent the contents of -the file in-memory, and for writing memory out, to relieve memory pressure on -the kernel and to synchronize in-memory changes to filesystems. - -An `CachingInodeOperations` enables readable and/or writable memory access to -file content. Files can be mapped shared or private, see mmap(2). When a file is -mapped shared, changes to the file via write(2) and truncate(2) are reflected in -the shared memory region. Conversely, when the shared memory region is modified, -changes to the file are visible via read(2). Multiple shared mappings of the -same file are coherent with each other. This is consistent with Linux. - -When a file is mapped private, updates to the mapped memory are not visible to -other memory mappings. Updates to the mapped memory are also not reflected in -the file content as seen by read(2). If the file is changed after a private -mapping is created, for instance by write(2), the change to the file may or may -not be reflected in the private mapping. This is consistent with Linux. - -An `CachingInodeOperations` keeps track of ranges of memory that were modified -(or "dirtied"). When the file is explicitly synced via fsync(2), only the dirty -ranges are written out to the filesystem. Any error returned indicates a failure -to write all dirty memory of an `CachingInodeOperations` to the filesystem. In -this case the filesystem may be in an inconsistent state. The same operation can -be performed on the shared memory itself using msync(2). If neither fsync(2) nor -msync(2) is performed, then the dirty memory is written out in accordance with -the `CachingInodeOperations` eviction strategy (see below) and there is no -guarantee that memory will be written out successfully in full. - -### Memory allocation and eviction - -An `CachingInodeOperations` implements the following allocation and eviction -strategy: - -- Memory is allocated and brought up to date with the contents of a file when - a region of mapped memory is accessed (or "faulted on"). - -- Dirty memory is written out to filesystems when an fsync(2) or msync(2) - operation is performed on a memory mapped file, for all memory mapped files - when saved, and/or when there are no longer any memory mappings of a range - of a file, see munmap(2). As the latter implies, in the absence of a panic - or SIGKILL, dirty memory is written out for all memory mapped files when an - application exits. - -- Memory is freed when there are no longer any memory mappings of a range of a - file (e.g. when an application exits). This behavior is consistent with - Linux for shared memory that has been locked via mlock(2). - -Notably, memory is not allocated for read(2) or write(2) operations. This means -that reads and writes to the file are only accelerated by an -`CachingInodeOperations` if the file being read or written has been memory -mapped *and* if the shared memory has been accessed at the region being read or -written. This diverges from Linux which buffers memory into a page cache on -read(2) proactively (i.e. readahead) and delays writing it out to filesystems on -write(2) (i.e. writeback). The absence of these optimizations is not visible to -applications beyond less than optimal performance when repeatedly reading and/or -writing to same region of a file. See [Future Work](#future-work) for plans to -implement these optimizations. - -Additionally, memory held by `CachingInodeOperationss` is currently unbounded in -size. An `CachingInodeOperations` does not write out dirty memory and free it -under system memory pressure. This can cause pathological memory usage. - -When memory is written back, an `CachingInodeOperations` may write regions of -shared memory that were never modified. This is due to the strategy of -minimizing page faults (see below) and handling only a subset of memory write -faults. In the absence of an application or sentry crash, it is guaranteed that -if a region of shared memory was written to, it is written back to a filesystem. - -### Life of a shared memory mapping - -A file is memory mapped via mmap(2). For example, if `A` is an address, an -application may execute: - -``` -mmap(A, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); -``` - -This creates a shared mapping of fd that reflects 4k of the contents of fd -starting at offset 0, accessible at address `A`. This in turn creates a virtual -memory area region ("vma") which indicates that [`A`, `A`+0x1000) is now a valid -address range for this application to access. - -At this point, memory has not been allocated in the file's -`CachingInodeOperations`. It is also the case that the address range [`A`, -`A`+0x1000) has not been mapped on the host on behalf of the application. If the -application then tries to modify 8 bytes of the shared memory: - -``` -char buffer[] = "aaaaaaaa"; -memcpy(A, buffer, 8); -``` - -The host then sends a `SIGSEGV` to the sentry because the address range [`A`, -`A`+8) is not mapped on the host. The `SIGSEGV` indicates that the memory was -accessed writable. The sentry looks up the vma associated with [`A`, `A`+8), -finds the file that was mapped and its `CachingInodeOperations`. It then calls -`CachingInodeOperations.Translate` which allocates memory to back [`A`, `A`+8). -It may choose to allocate more memory (i.e. do "readahead") to minimize -subsequent faults. - -Memory that is allocated comes from a host tmpfs file (see -`pgalloc.MemoryFile`). The host tmpfs file memory is brought up to date with the -contents of the mapped file on its filesystem. The region of the host tmpfs file -that reflects the mapped file is then mapped into the host address space of the -application so that subsequent memory accesses do not repeatedly generate a -`SIGSEGV`. - -The range that was allocated, including any extra memory allocation to minimize -faults, is marked dirty due to the write fault. This overcounts dirty memory if -the extra memory allocated is never modified. - -To make the scenario more interesting, imagine that this application spawns -another process and maps the same file in the exact same way: - -``` -mmap(A, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); -``` - -Imagine that this process then tries to modify the file again but with only 4 -bytes: - -``` -char buffer[] = "bbbb"; -memcpy(A, buffer, 4); -``` - -Since the first process has already mapped and accessed the same region of the -file writable, `CachingInodeOperations.Translate` is called but returns the -memory that has already been allocated rather than allocating new memory. The -address range [`A`, `A`+0x1000) reflects the same cached view of the file as the -first process sees. For example, reading 8 bytes from the file from either -process via read(2) starting at offset 0 returns a consistent "bbbbaaaa". - -When this process no longer needs the shared memory, it may do: - -``` -munmap(A, 0x1000); -``` - -At this point, the modified memory cached by the `CachingInodeOperations` is not -written back to the file because it is still in use by the first process that -mapped it. When the first process also does: - -``` -munmap(A, 0x1000); -``` - -Then the last memory mapping of the file at the range [0, 0x1000) is gone. The -file's `CachingInodeOperations` then starts writing back memory marked dirty to -the file on its filesystem. Once writing completes, regardless of whether it was -successful, the `CachingInodeOperations` frees the memory cached at the range -[0, 0x1000). - -Subsequent read(2) or write(2) operations on the file go directly to the -filesystem since there no longer exists memory for it in its -`CachingInodeOperations`. - -## Future Work - -### Page cache - -The sentry does not yet implement the readahead and writeback optimizations for -read(2) and write(2) respectively. To do so, on read(2) and/or write(2) the -sentry must ensure that memory is allocated in a page cache to read or write -into. However, the sentry cannot boundlessly allocate memory. If it did, the -host would eventually OOM-kill the sentry+application process. This means that -the sentry must implement a page cache memory allocation strategy that is -bounded by a global user or container imposed limit. When this limit is -approached, the sentry must decide from which page cache memory should be freed -so that it can allocate more memory. If it makes a poor decision, the sentry may -end up freeing and re-allocating memory to back regions of files that are -frequently used, nullifying the optimization (and in some cases causing worse -performance due to the overhead of memory allocation and general management). -This is a form of "cache thrashing". - -In Linux, much research has been done to select and implement a lightweight but -optimal page cache eviction algorithm. Linux makes use of hardware page bits to -keep track of whether memory has been accessed. The sentry does not have direct -access to hardware. Implementing a similarly lightweight and optimal page cache -eviction algorithm will need to either introduce a kernel interface to obtain -these page bits or find a suitable alternative proxy for access events. - -In Linux, readahead happens by default but is not always ideal. For instance, -for files that are not read sequentially, it would be more ideal to simply read -from only those regions of the file rather than to optimistically cache some -number of bytes ahead of the read (up to 2MB in Linux) if the bytes cached won't -be accessed. Linux implements the fadvise64(2) system call for applications to -specify that a range of a file will not be accessed sequentially. The advice bit -FADV_RANDOM turns off the readahead optimization for the given range in the -given file. However fadvise64 is rarely used by applications so Linux implements -a readahead backoff strategy if reads are not sequential. To ensure that -application performance is not degraded, the sentry must implement a similar -backoff strategy. diff --git a/pkg/sentry/fs/fsutil/dirty_set_impl.go b/pkg/sentry/fs/fsutil/dirty_set_impl.go new file mode 100755 index 000000000..2510b81b3 --- /dev/null +++ b/pkg/sentry/fs/fsutil/dirty_set_impl.go @@ -0,0 +1,1274 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + DirtyminDegree = 3 + + DirtymaxDegree = 2 * DirtyminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type DirtySet struct { + root Dirtynode `state:".(*DirtySegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *DirtySet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *DirtySet) IsEmptyRange(r __generics_imported0.MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *DirtySet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *DirtySet) SpanRange(r __generics_imported0.MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *DirtySet) FirstSegment() DirtyIterator { + if s.root.nrSegments == 0 { + return DirtyIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *DirtySet) LastSegment() DirtyIterator { + if s.root.nrSegments == 0 { + return DirtyIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *DirtySet) FirstGap() DirtyGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return DirtyGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *DirtySet) LastGap() DirtyGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return DirtyGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *DirtySet) Find(key uint64) (DirtyIterator, DirtyGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return DirtyIterator{n, i}, DirtyGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return DirtyIterator{}, DirtyGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *DirtySet) FindSegment(key uint64) DirtyIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *DirtySet) LowerBoundSegment(min uint64) DirtyIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *DirtySet) UpperBoundSegment(max uint64) DirtyIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *DirtySet) FindGap(key uint64) DirtyGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *DirtySet) LowerBoundGap(min uint64) DirtyGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *DirtySet) UpperBoundGap(max uint64) DirtyGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *DirtySet) Add(r __generics_imported0.MappableRange, val DirtyInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *DirtySet) AddWithoutMerging(r __generics_imported0.MappableRange, val DirtyInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *DirtySet) Insert(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (dirtySetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *DirtySet) InsertWithoutMerging(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *DirtySet) InsertWithoutMergingUnchecked(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return DirtyIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *DirtySet) Remove(seg DirtyIterator) DirtyGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + dirtySetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(DirtyGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *DirtySet) RemoveAll() { + s.root = Dirtynode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *DirtySet) RemoveRange(r __generics_imported0.MappableRange) DirtyGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *DirtySet) Merge(first, second DirtyIterator) DirtyIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *DirtySet) MergeUnchecked(first, second DirtyIterator) DirtyIterator { + if first.End() == second.Start() { + if mval, ok := (dirtySetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return DirtyIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *DirtySet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *DirtySet) MergeRange(r __generics_imported0.MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *DirtySet) MergeAdjacent(r __generics_imported0.MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *DirtySet) Split(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *DirtySet) SplitUnchecked(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) { + val1, val2 := (dirtySetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *DirtySet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *DirtySet) Isolate(seg DirtyIterator, r __generics_imported0.MappableRange) DirtyIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *DirtySet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg DirtyIterator)) DirtyGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return DirtyGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return DirtyGapIterator{} + } + } +} + +// +stateify savable +type Dirtynode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Dirtynode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [DirtymaxDegree - 1]__generics_imported0.MappableRange + values [DirtymaxDegree - 1]DirtyInfo + children [DirtymaxDegree]*Dirtynode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Dirtynode) firstSegment() DirtyIterator { + for n.hasChildren { + n = n.children[0] + } + return DirtyIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Dirtynode) lastSegment() DirtyIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return DirtyIterator{n, n.nrSegments - 1} +} + +func (n *Dirtynode) prevSibling() *Dirtynode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Dirtynode) nextSibling() *Dirtynode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Dirtynode) rebalanceBeforeInsert(gap DirtyGapIterator) DirtyGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < DirtymaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:DirtyminDegree-1], n.keys[:DirtyminDegree-1]) + copy(left.values[:DirtyminDegree-1], n.values[:DirtyminDegree-1]) + copy(right.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:]) + copy(right.values[:DirtyminDegree-1], n.values[DirtyminDegree:]) + n.keys[0], n.values[0] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1] + DirtyzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:DirtyminDegree], n.children[:DirtyminDegree]) + copy(right.children[:DirtyminDegree], n.children[DirtyminDegree:]) + DirtyzeroNodeSlice(n.children[2:]) + for i := 0; i < DirtyminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < DirtyminDegree { + return DirtyGapIterator{left, gap.index} + } + return DirtyGapIterator{right, gap.index - DirtyminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:]) + copy(sibling.values[:DirtyminDegree-1], n.values[DirtyminDegree:]) + DirtyzeroValueSlice(n.values[DirtyminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:DirtyminDegree], n.children[DirtyminDegree:]) + DirtyzeroNodeSlice(n.children[DirtyminDegree:]) + for i := 0; i < DirtyminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = DirtyminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < DirtyminDegree { + return gap + } + return DirtyGapIterator{sibling, gap.index - DirtyminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Dirtynode) rebalanceAfterRemove(gap DirtyGapIterator) DirtyGapIterator { + for { + if n.nrSegments >= DirtyminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return DirtyGapIterator{n, 0} + } + if gap.node == n { + return DirtyGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return DirtyGapIterator{n, n.nrSegments} + } + return DirtyGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return DirtyGapIterator{p, gap.index} + } + if gap.node == right { + return DirtyGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Dirtynode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = DirtyGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + dirtySetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type DirtyIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Dirtynode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg DirtyIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg DirtyIterator) Range() __generics_imported0.MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg DirtyIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg DirtyIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg DirtyIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetRange(r __generics_imported0.MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg DirtyIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg DirtyIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg DirtyIterator) Value() DirtyInfo { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg DirtyIterator) ValuePtr() *DirtyInfo { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg DirtyIterator) SetValue(val DirtyInfo) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg DirtyIterator) PrevSegment() DirtyIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return DirtyIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return DirtyIterator{} + } + return DirtysegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg DirtyIterator) NextSegment() DirtyIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return DirtyIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return DirtyIterator{} + } + return DirtysegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg DirtyIterator) PrevGap() DirtyGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return DirtyGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg DirtyIterator) NextGap() DirtyGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return DirtyGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg DirtyIterator) PrevNonEmpty() (DirtyIterator, DirtyGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return DirtyIterator{}, gap + } + return gap.PrevSegment(), DirtyGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg DirtyIterator) NextNonEmpty() (DirtyIterator, DirtyGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return DirtyIterator{}, gap + } + return gap.NextSegment(), DirtyGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type DirtyGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Dirtynode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap DirtyGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap DirtyGapIterator) Range() __generics_imported0.MappableRange { + return __generics_imported0.MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap DirtyGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return dirtySetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap DirtyGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return dirtySetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap DirtyGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap DirtyGapIterator) PrevSegment() DirtyIterator { + return DirtysegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap DirtyGapIterator) NextSegment() DirtyIterator { + return DirtysegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap DirtyGapIterator) PrevGap() DirtyGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return DirtyGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap DirtyGapIterator) NextGap() DirtyGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return DirtyGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func DirtysegmentBeforePosition(n *Dirtynode, i int) DirtyIterator { + for i == 0 { + if n.parent == nil { + return DirtyIterator{} + } + n, i = n.parent, n.parentIndex + } + return DirtyIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func DirtysegmentAfterPosition(n *Dirtynode, i int) DirtyIterator { + for i == n.nrSegments { + if n.parent == nil { + return DirtyIterator{} + } + n, i = n.parent, n.parentIndex + } + return DirtyIterator{n, i} +} + +func DirtyzeroValueSlice(slice []DirtyInfo) { + + for i := range slice { + dirtySetFunctions{}.ClearValue(&slice[i]) + } +} + +func DirtyzeroNodeSlice(slice []*Dirtynode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *DirtySet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Dirtynode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Dirtynode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type DirtySegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []DirtyInfo +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *DirtySet) ExportSortedSlices() *DirtySegmentDataSlices { + var sds DirtySegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *DirtySet) ImportSortedSlices(sds *DirtySegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *DirtySet) saveRoot() *DirtySegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *DirtySet) loadRoot(sds *DirtySegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go deleted file mode 100644 index 75575d994..000000000 --- a/pkg/sentry/fs/fsutil/dirty_set_test.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fsutil - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -func TestDirtySet(t *testing.T) { - var set DirtySet - set.MarkDirty(memmap.MappableRange{0, 2 * usermem.PageSize}) - set.KeepDirty(memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize}) - set.MarkClean(memmap.MappableRange{0, 2 * usermem.PageSize}) - want := &DirtySegmentDataSlices{ - Start: []uint64{usermem.PageSize}, - End: []uint64{2 * usermem.PageSize}, - Values: []DirtyInfo{{Keep: true}}, - } - if got := set.ExportSortedSlices(); !reflect.DeepEqual(got, want) { - t.Errorf("set:\n\tgot %v,\n\twant %v", got, want) - } -} diff --git a/pkg/sentry/fs/fsutil/file_range_set_impl.go b/pkg/sentry/fs/fsutil/file_range_set_impl.go new file mode 100755 index 000000000..0548bba08 --- /dev/null +++ b/pkg/sentry/fs/fsutil/file_range_set_impl.go @@ -0,0 +1,1274 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + FileRangeminDegree = 3 + + FileRangemaxDegree = 2 * FileRangeminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type FileRangeSet struct { + root FileRangenode `state:".(*FileRangeSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *FileRangeSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *FileRangeSet) IsEmptyRange(r __generics_imported0.MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *FileRangeSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *FileRangeSet) SpanRange(r __generics_imported0.MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *FileRangeSet) FirstSegment() FileRangeIterator { + if s.root.nrSegments == 0 { + return FileRangeIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *FileRangeSet) LastSegment() FileRangeIterator { + if s.root.nrSegments == 0 { + return FileRangeIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *FileRangeSet) FirstGap() FileRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return FileRangeGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *FileRangeSet) LastGap() FileRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FileRangeGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *FileRangeSet) Find(key uint64) (FileRangeIterator, FileRangeGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return FileRangeIterator{n, i}, FileRangeGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return FileRangeIterator{}, FileRangeGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *FileRangeSet) FindSegment(key uint64) FileRangeIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *FileRangeSet) LowerBoundSegment(min uint64) FileRangeIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *FileRangeSet) UpperBoundSegment(max uint64) FileRangeIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *FileRangeSet) FindGap(key uint64) FileRangeGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *FileRangeSet) LowerBoundGap(min uint64) FileRangeGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *FileRangeSet) UpperBoundGap(max uint64) FileRangeGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *FileRangeSet) Add(r __generics_imported0.MappableRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *FileRangeSet) AddWithoutMerging(r __generics_imported0.MappableRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *FileRangeSet) Insert(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (fileRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (fileRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (fileRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *FileRangeSet) InsertWithoutMerging(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *FileRangeSet) InsertWithoutMergingUnchecked(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return FileRangeIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *FileRangeSet) Remove(seg FileRangeIterator) FileRangeGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + fileRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(FileRangeGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *FileRangeSet) RemoveAll() { + s.root = FileRangenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *FileRangeSet) RemoveRange(r __generics_imported0.MappableRange) FileRangeGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *FileRangeSet) Merge(first, second FileRangeIterator) FileRangeIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *FileRangeSet) MergeUnchecked(first, second FileRangeIterator) FileRangeIterator { + if first.End() == second.Start() { + if mval, ok := (fileRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return FileRangeIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *FileRangeSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *FileRangeSet) MergeRange(r __generics_imported0.MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *FileRangeSet) MergeAdjacent(r __generics_imported0.MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *FileRangeSet) Split(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *FileRangeSet) SplitUnchecked(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) { + val1, val2 := (fileRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *FileRangeSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *FileRangeSet) Isolate(seg FileRangeIterator, r __generics_imported0.MappableRange) FileRangeIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *FileRangeSet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg FileRangeIterator)) FileRangeGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return FileRangeGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return FileRangeGapIterator{} + } + } +} + +// +stateify savable +type FileRangenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *FileRangenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [FileRangemaxDegree - 1]__generics_imported0.MappableRange + values [FileRangemaxDegree - 1]uint64 + children [FileRangemaxDegree]*FileRangenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FileRangenode) firstSegment() FileRangeIterator { + for n.hasChildren { + n = n.children[0] + } + return FileRangeIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FileRangenode) lastSegment() FileRangeIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FileRangeIterator{n, n.nrSegments - 1} +} + +func (n *FileRangenode) prevSibling() *FileRangenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *FileRangenode) nextSibling() *FileRangenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *FileRangenode) rebalanceBeforeInsert(gap FileRangeGapIterator) FileRangeGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < FileRangemaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:FileRangeminDegree-1], n.keys[:FileRangeminDegree-1]) + copy(left.values[:FileRangeminDegree-1], n.values[:FileRangeminDegree-1]) + copy(right.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:]) + copy(right.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:]) + n.keys[0], n.values[0] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1] + FileRangezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:FileRangeminDegree], n.children[:FileRangeminDegree]) + copy(right.children[:FileRangeminDegree], n.children[FileRangeminDegree:]) + FileRangezeroNodeSlice(n.children[2:]) + for i := 0; i < FileRangeminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < FileRangeminDegree { + return FileRangeGapIterator{left, gap.index} + } + return FileRangeGapIterator{right, gap.index - FileRangeminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:]) + copy(sibling.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:]) + FileRangezeroValueSlice(n.values[FileRangeminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:FileRangeminDegree], n.children[FileRangeminDegree:]) + FileRangezeroNodeSlice(n.children[FileRangeminDegree:]) + for i := 0; i < FileRangeminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = FileRangeminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < FileRangeminDegree { + return gap + } + return FileRangeGapIterator{sibling, gap.index - FileRangeminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *FileRangenode) rebalanceAfterRemove(gap FileRangeGapIterator) FileRangeGapIterator { + for { + if n.nrSegments >= FileRangeminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + fileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return FileRangeGapIterator{n, 0} + } + if gap.node == n { + return FileRangeGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + fileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return FileRangeGapIterator{n, n.nrSegments} + } + return FileRangeGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return FileRangeGapIterator{p, gap.index} + } + if gap.node == right { + return FileRangeGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *FileRangenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = FileRangeGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + fileRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FileRangeIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *FileRangenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg FileRangeIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg FileRangeIterator) Range() __generics_imported0.MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg FileRangeIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg FileRangeIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg FileRangeIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetRange(r __generics_imported0.MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg FileRangeIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg FileRangeIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg FileRangeIterator) Value() uint64 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg FileRangeIterator) ValuePtr() *uint64 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg FileRangeIterator) SetValue(val uint64) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg FileRangeIterator) PrevSegment() FileRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return FileRangeIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return FileRangeIterator{} + } + return FileRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg FileRangeIterator) NextSegment() FileRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return FileRangeIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return FileRangeIterator{} + } + return FileRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg FileRangeIterator) PrevGap() FileRangeGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return FileRangeGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg FileRangeIterator) NextGap() FileRangeGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return FileRangeGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg FileRangeIterator) PrevNonEmpty() (FileRangeIterator, FileRangeGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return FileRangeIterator{}, gap + } + return gap.PrevSegment(), FileRangeGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg FileRangeIterator) NextNonEmpty() (FileRangeIterator, FileRangeGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return FileRangeIterator{}, gap + } + return gap.NextSegment(), FileRangeGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FileRangeGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *FileRangenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap FileRangeGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap FileRangeGapIterator) Range() __generics_imported0.MappableRange { + return __generics_imported0.MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap FileRangeGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return fileRangeSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap FileRangeGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return fileRangeSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap FileRangeGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap FileRangeGapIterator) PrevSegment() FileRangeIterator { + return FileRangesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap FileRangeGapIterator) NextSegment() FileRangeIterator { + return FileRangesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap FileRangeGapIterator) PrevGap() FileRangeGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return FileRangeGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap FileRangeGapIterator) NextGap() FileRangeGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return FileRangeGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func FileRangesegmentBeforePosition(n *FileRangenode, i int) FileRangeIterator { + for i == 0 { + if n.parent == nil { + return FileRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return FileRangeIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func FileRangesegmentAfterPosition(n *FileRangenode, i int) FileRangeIterator { + for i == n.nrSegments { + if n.parent == nil { + return FileRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return FileRangeIterator{n, i} +} + +func FileRangezeroValueSlice(slice []uint64) { + + for i := range slice { + fileRangeSetFunctions{}.ClearValue(&slice[i]) + } +} + +func FileRangezeroNodeSlice(slice []*FileRangenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *FileRangeSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *FileRangenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *FileRangenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type FileRangeSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []uint64 +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *FileRangeSet) ExportSortedSlices() *FileRangeSegmentDataSlices { + var sds FileRangeSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *FileRangeSet) ImportSortedSlices(sds *FileRangeSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *FileRangeSet) saveRoot() *FileRangeSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *FileRangeSet) loadRoot(sds *FileRangeSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/frame_ref_set_impl.go b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go new file mode 100755 index 000000000..d4601bffa --- /dev/null +++ b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go @@ -0,0 +1,1274 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/platform" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + frameRefminDegree = 3 + + frameRefmaxDegree = 2 * frameRefminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type frameRefSet struct { + root frameRefnode `state:".(*frameRefSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *frameRefSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *frameRefSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *frameRefSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *frameRefSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *frameRefSet) FirstSegment() frameRefIterator { + if s.root.nrSegments == 0 { + return frameRefIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *frameRefSet) LastSegment() frameRefIterator { + if s.root.nrSegments == 0 { + return frameRefIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *frameRefSet) FirstGap() frameRefGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return frameRefGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *frameRefSet) LastGap() frameRefGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return frameRefGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *frameRefSet) Find(key uint64) (frameRefIterator, frameRefGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return frameRefIterator{n, i}, frameRefGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return frameRefIterator{}, frameRefGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *frameRefSet) FindSegment(key uint64) frameRefIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *frameRefSet) LowerBoundSegment(min uint64) frameRefIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *frameRefSet) UpperBoundSegment(max uint64) frameRefIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *frameRefSet) FindGap(key uint64) frameRefGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *frameRefSet) LowerBoundGap(min uint64) frameRefGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *frameRefSet) UpperBoundGap(max uint64) frameRefGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *frameRefSet) Add(r __generics_imported0.FileRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *frameRefSet) AddWithoutMerging(r __generics_imported0.FileRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *frameRefSet) Insert(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (frameRefSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (frameRefSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (frameRefSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *frameRefSet) InsertWithoutMerging(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *frameRefSet) InsertWithoutMergingUnchecked(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return frameRefIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *frameRefSet) Remove(seg frameRefIterator) frameRefGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + frameRefSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(frameRefGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *frameRefSet) RemoveAll() { + s.root = frameRefnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *frameRefSet) RemoveRange(r __generics_imported0.FileRange) frameRefGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *frameRefSet) Merge(first, second frameRefIterator) frameRefIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *frameRefSet) MergeUnchecked(first, second frameRefIterator) frameRefIterator { + if first.End() == second.Start() { + if mval, ok := (frameRefSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return frameRefIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *frameRefSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *frameRefSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *frameRefSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *frameRefSet) Split(seg frameRefIterator, split uint64) (frameRefIterator, frameRefIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *frameRefSet) SplitUnchecked(seg frameRefIterator, split uint64) (frameRefIterator, frameRefIterator) { + val1, val2 := (frameRefSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *frameRefSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *frameRefSet) Isolate(seg frameRefIterator, r __generics_imported0.FileRange) frameRefIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *frameRefSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg frameRefIterator)) frameRefGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return frameRefGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return frameRefGapIterator{} + } + } +} + +// +stateify savable +type frameRefnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *frameRefnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [frameRefmaxDegree - 1]__generics_imported0.FileRange + values [frameRefmaxDegree - 1]uint64 + children [frameRefmaxDegree]*frameRefnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *frameRefnode) firstSegment() frameRefIterator { + for n.hasChildren { + n = n.children[0] + } + return frameRefIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *frameRefnode) lastSegment() frameRefIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return frameRefIterator{n, n.nrSegments - 1} +} + +func (n *frameRefnode) prevSibling() *frameRefnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *frameRefnode) nextSibling() *frameRefnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *frameRefnode) rebalanceBeforeInsert(gap frameRefGapIterator) frameRefGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < frameRefmaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &frameRefnode{ + nrSegments: frameRefminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &frameRefnode{ + nrSegments: frameRefminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:frameRefminDegree-1], n.keys[:frameRefminDegree-1]) + copy(left.values[:frameRefminDegree-1], n.values[:frameRefminDegree-1]) + copy(right.keys[:frameRefminDegree-1], n.keys[frameRefminDegree:]) + copy(right.values[:frameRefminDegree-1], n.values[frameRefminDegree:]) + n.keys[0], n.values[0] = n.keys[frameRefminDegree-1], n.values[frameRefminDegree-1] + frameRefzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:frameRefminDegree], n.children[:frameRefminDegree]) + copy(right.children[:frameRefminDegree], n.children[frameRefminDegree:]) + frameRefzeroNodeSlice(n.children[2:]) + for i := 0; i < frameRefminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < frameRefminDegree { + return frameRefGapIterator{left, gap.index} + } + return frameRefGapIterator{right, gap.index - frameRefminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[frameRefminDegree-1], n.values[frameRefminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &frameRefnode{ + nrSegments: frameRefminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:frameRefminDegree-1], n.keys[frameRefminDegree:]) + copy(sibling.values[:frameRefminDegree-1], n.values[frameRefminDegree:]) + frameRefzeroValueSlice(n.values[frameRefminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:frameRefminDegree], n.children[frameRefminDegree:]) + frameRefzeroNodeSlice(n.children[frameRefminDegree:]) + for i := 0; i < frameRefminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = frameRefminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < frameRefminDegree { + return gap + } + return frameRefGapIterator{sibling, gap.index - frameRefminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *frameRefnode) rebalanceAfterRemove(gap frameRefGapIterator) frameRefGapIterator { + for { + if n.nrSegments >= frameRefminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= frameRefminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + frameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return frameRefGapIterator{n, 0} + } + if gap.node == n { + return frameRefGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= frameRefminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + frameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return frameRefGapIterator{n, n.nrSegments} + } + return frameRefGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return frameRefGapIterator{p, gap.index} + } + if gap.node == right { + return frameRefGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *frameRefnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = frameRefGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + frameRefSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type frameRefIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *frameRefnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg frameRefIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg frameRefIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg frameRefIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg frameRefIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg frameRefIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg frameRefIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg frameRefIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg frameRefIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg frameRefIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg frameRefIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg frameRefIterator) Value() uint64 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg frameRefIterator) ValuePtr() *uint64 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg frameRefIterator) SetValue(val uint64) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg frameRefIterator) PrevSegment() frameRefIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return frameRefIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return frameRefIterator{} + } + return frameRefsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg frameRefIterator) NextSegment() frameRefIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return frameRefIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return frameRefIterator{} + } + return frameRefsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg frameRefIterator) PrevGap() frameRefGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return frameRefGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg frameRefIterator) NextGap() frameRefGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return frameRefGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg frameRefIterator) PrevNonEmpty() (frameRefIterator, frameRefGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return frameRefIterator{}, gap + } + return gap.PrevSegment(), frameRefGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg frameRefIterator) NextNonEmpty() (frameRefIterator, frameRefGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return frameRefIterator{}, gap + } + return gap.NextSegment(), frameRefGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type frameRefGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *frameRefnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap frameRefGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap frameRefGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap frameRefGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return frameRefSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap frameRefGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return frameRefSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap frameRefGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap frameRefGapIterator) PrevSegment() frameRefIterator { + return frameRefsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap frameRefGapIterator) NextSegment() frameRefIterator { + return frameRefsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap frameRefGapIterator) PrevGap() frameRefGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return frameRefGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap frameRefGapIterator) NextGap() frameRefGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return frameRefGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func frameRefsegmentBeforePosition(n *frameRefnode, i int) frameRefIterator { + for i == 0 { + if n.parent == nil { + return frameRefIterator{} + } + n, i = n.parent, n.parentIndex + } + return frameRefIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func frameRefsegmentAfterPosition(n *frameRefnode, i int) frameRefIterator { + for i == n.nrSegments { + if n.parent == nil { + return frameRefIterator{} + } + n, i = n.parent, n.parentIndex + } + return frameRefIterator{n, i} +} + +func frameRefzeroValueSlice(slice []uint64) { + + for i := range slice { + frameRefSetFunctions{}.ClearValue(&slice[i]) + } +} + +func frameRefzeroNodeSlice(slice []*frameRefnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *frameRefSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *frameRefnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *frameRefnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type frameRefSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []uint64 +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *frameRefSet) ExportSortedSlices() *frameRefSegmentDataSlices { + var sds frameRefSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *frameRefSet) ImportSortedSlices(sds *frameRefSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *frameRefSet) saveRoot() *frameRefSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *frameRefSet) loadRoot(sds *frameRefSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/fsutil_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go new file mode 100755 index 000000000..6371a66a5 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go @@ -0,0 +1,349 @@ +// automatically generated by stateify. + +package fsutil + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *DirtyInfo) beforeSave() {} +func (x *DirtyInfo) save(m state.Map) { + x.beforeSave() + m.Save("Keep", &x.Keep) +} + +func (x *DirtyInfo) afterLoad() {} +func (x *DirtyInfo) load(m state.Map) { + m.Load("Keep", &x.Keep) +} + +func (x *DirtySet) beforeSave() {} +func (x *DirtySet) save(m state.Map) { + x.beforeSave() + var root *DirtySegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *DirtySet) afterLoad() {} +func (x *DirtySet) load(m state.Map) { + m.LoadValue("root", new(*DirtySegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*DirtySegmentDataSlices)) }) +} + +func (x *Dirtynode) beforeSave() {} +func (x *Dirtynode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *Dirtynode) afterLoad() {} +func (x *Dirtynode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *DirtySegmentDataSlices) beforeSave() {} +func (x *DirtySegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *DirtySegmentDataSlices) afterLoad() {} +func (x *DirtySegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *StaticDirFileOperations) beforeSave() {} +func (x *StaticDirFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("dentryMap", &x.dentryMap) + m.Save("dirCursor", &x.dirCursor) +} + +func (x *StaticDirFileOperations) afterLoad() {} +func (x *StaticDirFileOperations) load(m state.Map) { + m.Load("dentryMap", &x.dentryMap) + m.Load("dirCursor", &x.dirCursor) +} + +func (x *NoReadWriteFile) beforeSave() {} +func (x *NoReadWriteFile) save(m state.Map) { + x.beforeSave() +} + +func (x *NoReadWriteFile) afterLoad() {} +func (x *NoReadWriteFile) load(m state.Map) { +} + +func (x *FileStaticContentReader) beforeSave() {} +func (x *FileStaticContentReader) save(m state.Map) { + x.beforeSave() + m.Save("content", &x.content) +} + +func (x *FileStaticContentReader) afterLoad() {} +func (x *FileStaticContentReader) load(m state.Map) { + m.Load("content", &x.content) +} + +func (x *FileRangeSet) beforeSave() {} +func (x *FileRangeSet) save(m state.Map) { + x.beforeSave() + var root *FileRangeSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *FileRangeSet) afterLoad() {} +func (x *FileRangeSet) load(m state.Map) { + m.LoadValue("root", new(*FileRangeSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*FileRangeSegmentDataSlices)) }) +} + +func (x *FileRangenode) beforeSave() {} +func (x *FileRangenode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *FileRangenode) afterLoad() {} +func (x *FileRangenode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *FileRangeSegmentDataSlices) beforeSave() {} +func (x *FileRangeSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *FileRangeSegmentDataSlices) afterLoad() {} +func (x *FileRangeSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *frameRefSet) beforeSave() {} +func (x *frameRefSet) save(m state.Map) { + x.beforeSave() + var root *frameRefSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *frameRefSet) afterLoad() {} +func (x *frameRefSet) load(m state.Map) { + m.LoadValue("root", new(*frameRefSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*frameRefSegmentDataSlices)) }) +} + +func (x *frameRefnode) beforeSave() {} +func (x *frameRefnode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *frameRefnode) afterLoad() {} +func (x *frameRefnode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *frameRefSegmentDataSlices) beforeSave() {} +func (x *frameRefSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *frameRefSegmentDataSlices) afterLoad() {} +func (x *frameRefSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *HostFileMapper) beforeSave() {} +func (x *HostFileMapper) save(m state.Map) { + x.beforeSave() + m.Save("refs", &x.refs) +} + +func (x *HostFileMapper) load(m state.Map) { + m.Load("refs", &x.refs) + m.AfterLoad(x.afterLoad) +} + +func (x *HostMappable) beforeSave() {} +func (x *HostMappable) save(m state.Map) { + x.beforeSave() + m.Save("hostFileMapper", &x.hostFileMapper) + m.Save("backingFile", &x.backingFile) + m.Save("mappings", &x.mappings) +} + +func (x *HostMappable) afterLoad() {} +func (x *HostMappable) load(m state.Map) { + m.Load("hostFileMapper", &x.hostFileMapper) + m.Load("backingFile", &x.backingFile) + m.Load("mappings", &x.mappings) +} + +func (x *SimpleFileInode) beforeSave() {} +func (x *SimpleFileInode) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *SimpleFileInode) afterLoad() {} +func (x *SimpleFileInode) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *NoReadWriteFileInode) beforeSave() {} +func (x *NoReadWriteFileInode) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *NoReadWriteFileInode) afterLoad() {} +func (x *NoReadWriteFileInode) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) +} + +func (x *InodeSimpleAttributes) beforeSave() {} +func (x *InodeSimpleAttributes) save(m state.Map) { + x.beforeSave() + m.Save("fsType", &x.fsType) + m.Save("unstable", &x.unstable) +} + +func (x *InodeSimpleAttributes) afterLoad() {} +func (x *InodeSimpleAttributes) load(m state.Map) { + m.Load("fsType", &x.fsType) + m.Load("unstable", &x.unstable) +} + +func (x *InodeSimpleExtendedAttributes) beforeSave() {} +func (x *InodeSimpleExtendedAttributes) save(m state.Map) { + x.beforeSave() + m.Save("xattrs", &x.xattrs) +} + +func (x *InodeSimpleExtendedAttributes) afterLoad() {} +func (x *InodeSimpleExtendedAttributes) load(m state.Map) { + m.Load("xattrs", &x.xattrs) +} + +func (x *staticFile) beforeSave() {} +func (x *staticFile) save(m state.Map) { + x.beforeSave() + m.Save("FileStaticContentReader", &x.FileStaticContentReader) +} + +func (x *staticFile) afterLoad() {} +func (x *staticFile) load(m state.Map) { + m.Load("FileStaticContentReader", &x.FileStaticContentReader) +} + +func (x *InodeStaticFileGetter) beforeSave() {} +func (x *InodeStaticFileGetter) save(m state.Map) { + x.beforeSave() + m.Save("Contents", &x.Contents) +} + +func (x *InodeStaticFileGetter) afterLoad() {} +func (x *InodeStaticFileGetter) load(m state.Map) { + m.Load("Contents", &x.Contents) +} + +func (x *CachingInodeOperations) beforeSave() {} +func (x *CachingInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("backingFile", &x.backingFile) + m.Save("mfp", &x.mfp) + m.Save("forcePageCache", &x.forcePageCache) + m.Save("attr", &x.attr) + m.Save("dirtyAttr", &x.dirtyAttr) + m.Save("mappings", &x.mappings) + m.Save("cache", &x.cache) + m.Save("dirty", &x.dirty) + m.Save("hostFileMapper", &x.hostFileMapper) + m.Save("refs", &x.refs) +} + +func (x *CachingInodeOperations) afterLoad() {} +func (x *CachingInodeOperations) load(m state.Map) { + m.Load("backingFile", &x.backingFile) + m.Load("mfp", &x.mfp) + m.Load("forcePageCache", &x.forcePageCache) + m.Load("attr", &x.attr) + m.Load("dirtyAttr", &x.dirtyAttr) + m.Load("mappings", &x.mappings) + m.Load("cache", &x.cache) + m.Load("dirty", &x.dirty) + m.Load("hostFileMapper", &x.hostFileMapper) + m.Load("refs", &x.refs) +} + +func init() { + state.Register("fsutil.DirtyInfo", (*DirtyInfo)(nil), state.Fns{Save: (*DirtyInfo).save, Load: (*DirtyInfo).load}) + state.Register("fsutil.DirtySet", (*DirtySet)(nil), state.Fns{Save: (*DirtySet).save, Load: (*DirtySet).load}) + state.Register("fsutil.Dirtynode", (*Dirtynode)(nil), state.Fns{Save: (*Dirtynode).save, Load: (*Dirtynode).load}) + state.Register("fsutil.DirtySegmentDataSlices", (*DirtySegmentDataSlices)(nil), state.Fns{Save: (*DirtySegmentDataSlices).save, Load: (*DirtySegmentDataSlices).load}) + state.Register("fsutil.StaticDirFileOperations", (*StaticDirFileOperations)(nil), state.Fns{Save: (*StaticDirFileOperations).save, Load: (*StaticDirFileOperations).load}) + state.Register("fsutil.NoReadWriteFile", (*NoReadWriteFile)(nil), state.Fns{Save: (*NoReadWriteFile).save, Load: (*NoReadWriteFile).load}) + state.Register("fsutil.FileStaticContentReader", (*FileStaticContentReader)(nil), state.Fns{Save: (*FileStaticContentReader).save, Load: (*FileStaticContentReader).load}) + state.Register("fsutil.FileRangeSet", (*FileRangeSet)(nil), state.Fns{Save: (*FileRangeSet).save, Load: (*FileRangeSet).load}) + state.Register("fsutil.FileRangenode", (*FileRangenode)(nil), state.Fns{Save: (*FileRangenode).save, Load: (*FileRangenode).load}) + state.Register("fsutil.FileRangeSegmentDataSlices", (*FileRangeSegmentDataSlices)(nil), state.Fns{Save: (*FileRangeSegmentDataSlices).save, Load: (*FileRangeSegmentDataSlices).load}) + state.Register("fsutil.frameRefSet", (*frameRefSet)(nil), state.Fns{Save: (*frameRefSet).save, Load: (*frameRefSet).load}) + state.Register("fsutil.frameRefnode", (*frameRefnode)(nil), state.Fns{Save: (*frameRefnode).save, Load: (*frameRefnode).load}) + state.Register("fsutil.frameRefSegmentDataSlices", (*frameRefSegmentDataSlices)(nil), state.Fns{Save: (*frameRefSegmentDataSlices).save, Load: (*frameRefSegmentDataSlices).load}) + state.Register("fsutil.HostFileMapper", (*HostFileMapper)(nil), state.Fns{Save: (*HostFileMapper).save, Load: (*HostFileMapper).load}) + state.Register("fsutil.HostMappable", (*HostMappable)(nil), state.Fns{Save: (*HostMappable).save, Load: (*HostMappable).load}) + state.Register("fsutil.SimpleFileInode", (*SimpleFileInode)(nil), state.Fns{Save: (*SimpleFileInode).save, Load: (*SimpleFileInode).load}) + state.Register("fsutil.NoReadWriteFileInode", (*NoReadWriteFileInode)(nil), state.Fns{Save: (*NoReadWriteFileInode).save, Load: (*NoReadWriteFileInode).load}) + state.Register("fsutil.InodeSimpleAttributes", (*InodeSimpleAttributes)(nil), state.Fns{Save: (*InodeSimpleAttributes).save, Load: (*InodeSimpleAttributes).load}) + state.Register("fsutil.InodeSimpleExtendedAttributes", (*InodeSimpleExtendedAttributes)(nil), state.Fns{Save: (*InodeSimpleExtendedAttributes).save, Load: (*InodeSimpleExtendedAttributes).load}) + state.Register("fsutil.staticFile", (*staticFile)(nil), state.Fns{Save: (*staticFile).save, Load: (*staticFile).load}) + state.Register("fsutil.InodeStaticFileGetter", (*InodeStaticFileGetter)(nil), state.Fns{Save: (*InodeStaticFileGetter).save, Load: (*InodeStaticFileGetter).load}) + state.Register("fsutil.CachingInodeOperations", (*CachingInodeOperations)(nil), state.Fns{Save: (*CachingInodeOperations).save, Load: (*CachingInodeOperations).load}) +} diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go deleted file mode 100644 index dc19255ed..000000000 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ /dev/null @@ -1,389 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fsutil - -import ( - "bytes" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" -) - -type noopBackingFile struct{} - -func (noopBackingFile) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { - return dsts.NumBytes(), nil -} - -func (noopBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { - return srcs.NumBytes(), nil -} - -func (noopBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr) error { - return nil -} - -func (noopBackingFile) Sync(context.Context) error { - return nil -} - -func (noopBackingFile) FD() int { - return -1 -} - -func (noopBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return nil -} - -func TestSetPermissions(t *testing.T) { - ctx := contexttest.Context(t) - - uattr := fs.WithCurrentTime(ctx, fs.UnstableAttr{ - Perms: fs.FilePermsFromMode(0444), - }) - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, false /*forcePageCache*/) - defer iops.Release() - - perms := fs.FilePermsFromMode(0777) - if !iops.SetPermissions(ctx, nil, perms) { - t.Fatalf("SetPermissions failed, want success") - } - - // Did permissions change? - if iops.attr.Perms != perms { - t.Fatalf("got perms +%v, want +%v", iops.attr.Perms, perms) - } - - // Did status change time change? - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("got status change time not dirty, want dirty") - } - if iops.attr.StatusChangeTime.Equal(uattr.StatusChangeTime) { - t.Fatalf("got status change time unchanged") - } -} - -func TestSetTimestamps(t *testing.T) { - ctx := contexttest.Context(t) - for _, test := range []struct { - desc string - ts fs.TimeSpec - wantChanged fs.AttrMask - }{ - { - desc: "noop", - ts: fs.TimeSpec{ - ATimeOmit: true, - MTimeOmit: true, - }, - wantChanged: fs.AttrMask{}, - }, - { - desc: "access time only", - ts: fs.TimeSpec{ - ATime: ktime.NowFromContext(ctx), - MTimeOmit: true, - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - }, - }, - { - desc: "modification time only", - ts: fs.TimeSpec{ - ATimeOmit: true, - MTime: ktime.NowFromContext(ctx), - }, - wantChanged: fs.AttrMask{ - ModificationTime: true, - }, - }, - { - desc: "access and modification time", - ts: fs.TimeSpec{ - ATime: ktime.NowFromContext(ctx), - MTime: ktime.NowFromContext(ctx), - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - ModificationTime: true, - }, - }, - { - desc: "system time access and modification time", - ts: fs.TimeSpec{ - ATimeSetSystemTime: true, - MTimeSetSystemTime: true, - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - ModificationTime: true, - }, - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - - epoch := ktime.ZeroTime - uattr := fs.UnstableAttr{ - AccessTime: epoch, - ModificationTime: epoch, - StatusChangeTime: epoch, - } - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, false /*forcePageCache*/) - defer iops.Release() - - if err := iops.SetTimestamps(ctx, nil, test.ts); err != nil { - t.Fatalf("SetTimestamps got error %v, want nil", err) - } - if test.wantChanged.AccessTime { - if !iops.attr.AccessTime.After(uattr.AccessTime) { - t.Fatalf("diritied access time did not advance, want %v > %v", iops.attr.AccessTime, uattr.AccessTime) - } - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("dirty access time requires dirty status change time") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not advance") - } - } - if test.wantChanged.ModificationTime { - if !iops.attr.ModificationTime.After(uattr.ModificationTime) { - t.Fatalf("diritied modification time did not advance") - } - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("dirty modification time requires dirty status change time") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not advance") - } - } - }) - } -} - -func TestTruncate(t *testing.T) { - ctx := contexttest.Context(t) - - uattr := fs.UnstableAttr{ - Size: 0, - } - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, false /*forcePageCache*/) - defer iops.Release() - - if err := iops.Truncate(ctx, nil, uattr.Size); err != nil { - t.Fatalf("Truncate got error %v, want nil", err) - } - var size int64 = 4096 - if err := iops.Truncate(ctx, nil, size); err != nil { - t.Fatalf("Truncate got error %v, want nil", err) - } - if iops.attr.Size != size { - t.Fatalf("Truncate got %d, want %d", iops.attr.Size, size) - } - if !iops.dirtyAttr.ModificationTime || !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("Truncate did not dirty modification and status change time") - } - if !iops.attr.ModificationTime.After(uattr.ModificationTime) { - t.Fatalf("dirtied modification time did not change") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not change") - } -} - -type sliceBackingFile struct { - data []byte -} - -func newSliceBackingFile(data []byte) *sliceBackingFile { - return &sliceBackingFile{data} -} - -func (f *sliceBackingFile) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { - r := safemem.BlockSeqReader{safemem.BlockSeqOf(safemem.BlockFromSafeSlice(f.data)).DropFirst64(offset)} - return r.ReadToBlocks(dsts) -} - -func (f *sliceBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { - w := safemem.BlockSeqWriter{safemem.BlockSeqOf(safemem.BlockFromSafeSlice(f.data)).DropFirst64(offset)} - return w.WriteFromBlocks(srcs) -} - -func (*sliceBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr) error { - return nil -} - -func (*sliceBackingFile) Sync(context.Context) error { - return nil -} - -func (*sliceBackingFile) FD() int { - return -1 -} - -func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return syserror.EOPNOTSUPP -} - -type noopMappingSpace struct{} - -// Invalidate implements memmap.MappingSpace.Invalidate. -func (noopMappingSpace) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) { -} - -func anonInode(ctx context.Context) *fs.Inode { - return fs.NewInode(ctx, &SimpleFileInode{ - InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, - }, 0), - }, fs.NewPseudoMountSource(ctx), fs.StableAttr{ - Type: fs.Anonymous, - BlockSize: usermem.PageSize, - }) -} - -func pagesOf(bs ...byte) []byte { - buf := make([]byte, 0, len(bs)*usermem.PageSize) - for _, b := range bs { - buf = append(buf, bytes.Repeat([]byte{b}, usermem.PageSize)...) - } - return buf -} - -func TestRead(t *testing.T) { - ctx := contexttest.Context(t) - - // Construct a 3-page file. - buf := pagesOf('a', 'b', 'c') - file := fs.NewFile(ctx, fs.NewDirent(ctx, anonInode(ctx), "anon"), fs.FileFlags{}, nil) - uattr := fs.UnstableAttr{ - Size: int64(len(buf)), - } - iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, false /*forcePageCache*/) - defer iops.Release() - - // Expect the cache to be initially empty. - if cached := iops.cache.Span(); cached != 0 { - t.Errorf("Span got %d, want 0", cached) - } - - // Create a memory mapping of the second page (as CachingInodeOperations - // expects to only cache mapped pages), then call Translate to force it to - // be cached. - var ms noopMappingSpace - ar := usermem.AddrRange{usermem.PageSize, 2 * usermem.PageSize} - if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil { - t.Fatalf("AddMapping got %v, want nil", err) - } - mr := memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize} - if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil { - t.Fatalf("Translate got %v, want nil", err) - } - if cached := iops.cache.Span(); cached != usermem.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, usermem.PageSize) - } - - // Try to read 4 pages. The first and third pages should be read directly - // from the "file", the second page should be read from the cache, and only - // 3 pages (the size of the file) should be readable. - rbuf := make([]byte, 4*usermem.PageSize) - dst := usermem.BytesIOSequence(rbuf) - n, err := iops.Read(ctx, file, dst, 0) - if n != 3*usermem.PageSize || (err != nil && err != io.EOF) { - t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*usermem.PageSize) - } - rbuf = rbuf[:3*usermem.PageSize] - - // Did we get the bytes we expect? - if !bytes.Equal(rbuf, buf) { - t.Errorf("Read back bytes %v, want %v", rbuf, buf) - } - - // Delete the memory mapping before iops.Release(). The cached page will - // either be evicted by ctx's pgalloc.MemoryFile, or dropped by - // iops.Release(). - iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true) -} - -func TestWrite(t *testing.T) { - ctx := contexttest.Context(t) - - // Construct a 4-page file. - buf := pagesOf('a', 'b', 'c', 'd') - orig := append([]byte(nil), buf...) - inode := anonInode(ctx) - uattr := fs.UnstableAttr{ - Size: int64(len(buf)), - } - iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, false /*forcePageCache*/) - defer iops.Release() - - // Expect the cache to be initially empty. - if cached := iops.cache.Span(); cached != 0 { - t.Errorf("Span got %d, want 0", cached) - } - - // Create a memory mapping of the second and third pages (as - // CachingInodeOperations expects to only cache mapped pages), then call - // Translate to force them to be cached. - var ms noopMappingSpace - ar := usermem.AddrRange{usermem.PageSize, 3 * usermem.PageSize} - if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil { - t.Fatalf("AddMapping got %v, want nil", err) - } - defer iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true) - mr := memmap.MappableRange{usermem.PageSize, 3 * usermem.PageSize} - if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil { - t.Fatalf("Translate got %v, want nil", err) - } - if cached := iops.cache.Span(); cached != 2*usermem.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, 2*usermem.PageSize) - } - - // Write to the first 2 pages. - wbuf := pagesOf('e', 'f') - src := usermem.BytesIOSequence(wbuf) - n, err := iops.Write(ctx, src, 0) - if n != 2*usermem.PageSize || err != nil { - t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*usermem.PageSize) - } - - // The first page should have been written directly, since it was not cached. - want := append([]byte(nil), orig...) - copy(want, pagesOf('e')) - if !bytes.Equal(buf, want) { - t.Errorf("File contents are %v, want %v", buf, want) - } - - // Sync back to the "backing file". - if err := iops.WriteOut(ctx, inode); err != nil { - t.Errorf("Sync got %v, want nil", err) - } - - // Now the second page should have been written as well. - copy(want[usermem.PageSize:], pagesOf('f')) - if !bytes.Equal(buf, want) { - t.Errorf("File contents are %v, want %v", buf, want) - } -} diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md deleted file mode 100644 index 71a577d9d..000000000 --- a/pkg/sentry/fs/g3doc/inotify.md +++ /dev/null @@ -1,122 +0,0 @@ -# Inotify - -Inotify implements the like-named filesystem event notification system for the -sentry, see `inotify(7)`. - -## Architecture - -For the most part, the sentry implementation of inotify mirrors the Linux -architecture. Inotify instances (i.e. the fd returned by inotify_init(2)) are -backed by a pseudo-filesystem. Events are generated from various places in the -sentry, including the [syscall layer][syscall_dir], the [vfs layer][dirent] and -the [process fd table][fd_table]. Watches are stored in inodes and generated -events are queued to the inotify instance owning the watches for delivery to the -user. - -## Objects - -Here is a brief description of the existing and new objects involved in the -sentry inotify mechanism, and how they interact: - -### [`fs.Inotify`][inotify] - -- An inotify instances, created by inotify_init(2)/inotify_init1(2). -- The inotify fd has a `fs.Dirent`, supports filesystem syscalls to read - events. -- Has multiple `fs.Watch`es, with at most one watch per target inode, per - inotify instance. -- Has an instance `id` which is globally unique. This is *not* the fd number - for this instance, since the fd can be duped. This `id` is not externally - visible. - -### [`fs.Watch`][watch] - -- An inotify watch, created/deleted by - inotify_add_watch(2)/inotify_rm_watch(2). -- Owned by an `fs.Inotify` instance, each watch keeps a pointer to the - `owner`. -- Associated with a single `fs.Inode`, which is the watch `target`. While the - watch is active, it indirectly pins `target` to memory. See the "Reference - Model" section for a detailed explanation. -- Filesystem operations on `target` generate `fs.Event`s. - -### [`fs.Event`][event] - -- A simple struct encapsulating all the fields for an inotify event. -- Generated by `fs.Watch`es and forwarded to the watches' `owner`s. -- Serialized to the user during read(2) syscalls on the associated - `fs.Inotify`'s fd. - -### [`fs.Dirent`][dirent] - -- Many inotify events are generated inside dirent methods. Events are - generated in the dirent methods rather than `fs.Inode` methods because some - events carry the name of the subject node, and node names are generally - unavailable in an `fs.Inode`. -- Dirents do not directly contain state for any watches. Instead, they forward - notifications to the underlying `fs.Inode`. - -### [`fs.Inode`][inode] - -- Interacts with inotify through `fs.Watch`es. -- Inodes contain a map of all active `fs.Watch`es on them. -- An `fs.Inotify` instance can have at most one `fs.Watch` per inode. - `fs.Watch`es on an inode are indexed by their `owner`'s `id`. -- All inotify logic is encapsulated in the [`Watches`][inode_watches] struct - in an inode. Logically, `Watches` is the set of inotify watches on the - inode. - -## Reference Model - -The sentry inotify implementation has a complex reference model. An inotify -watch observes a single inode. For efficient lookup, the state for a watch is -stored directly on the target inode. This state needs to be persistent for the -lifetime of watch. Unlike usual filesystem metadata, the watch state has no -"on-disk" representation, so they cannot be reconstructed by the filesystem if -the inode is flushed from memory. This effectively means we need to keep any -inodes with actives watches pinned to memory. - -We can't just hold an extra ref on the inode to pin it to memory because some -filesystems (such as gofer-based filesystems) don't have persistent inodes. In -such a filesystem, if we just pin the inode, nothing prevents the enclosing -dirent from being GCed. Once the dirent is GCed, the pinned inode is -unreachable -- these filesystems generate a new inode by re-reading the node -state on the next walk. Incidentally, hardlinks also don't work on these -filesystems for this reason. - -To prevent the above scenario, when a new watch is added on an inode, we *pin* -the dirent we used to reach the inode. Note that due to hardlinks, this dirent -may not be the only dirent pointing to the inode. Attempting to set an inotify -watch via multiple hardlinks to the same file results in the same watch being -returned for both links. However, for each new dirent we use to reach the same -inode, we add a new pin. We need a new pin for each new dirent used to reach the -inode because we have no guarantees about the deletion order of the different -links to the inode. - -## Lock Ordering - -There are 4 locks related to the inotify implementation: - -- `Inotify.mu`: the inotify instance lock. -- `Inotify.evMu`: the inotify event queue lock. -- `Watch.mu`: the watch lock, used to protect pins. -- `fs.Watches.mu`: the inode watch set mu, used to protect the collection of - watches on the inode. - -The correct lock ordering for inotify code is: - -`Inotify.mu` -> `fs.Watches.mu` -> `Watch.mu` -> `Inotify.evMu`. - -We need a distinct lock for the event queue because by the time a goroutine -attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used -`Inotify.mu` to also protect the event queue, this would violate the above lock -ordering. - -[dirent]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/dirent.go -[event]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_event.go -[fd_table]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/kernel/fd_table.go -[inode]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode.go -[inode_watches]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode_inotify.go -[inotify]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify.go -[syscall_dir]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/syscalls/linux/ -[watch]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_watch.go diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD deleted file mode 100644 index 6b993928c..000000000 --- a/pkg/sentry/fs/gofer/BUILD +++ /dev/null @@ -1,65 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "gofer", - srcs = [ - "attr.go", - "cache_policy.go", - "context_file.go", - "device.go", - "file.go", - "file_state.go", - "fs.go", - "handles.go", - "inode.go", - "inode_state.go", - "path.go", - "session.go", - "session_state.go", - "socket.go", - "util.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/gofer", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/fd", - "//pkg/log", - "//pkg/metric", - "//pkg/p9", - "//pkg/refs", - "//pkg/secio", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fdpipe", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/host", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/safemem", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/unet", - "//pkg/waiter", - ], -) - -go_test( - name = "gofer_test", - size = "small", - srcs = ["gofer_test.go"], - embed = [":gofer"], - deps = [ - "//pkg/p9", - "//pkg/p9/p9test", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - ], -) diff --git a/pkg/sentry/fs/gofer/gofer_state_autogen.go b/pkg/sentry/fs/gofer/gofer_state_autogen.go new file mode 100755 index 000000000..e05895fab --- /dev/null +++ b/pkg/sentry/fs/gofer/gofer_state_autogen.go @@ -0,0 +1,113 @@ +// automatically generated by stateify. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *fileOperations) beforeSave() {} +func (x *fileOperations) save(m state.Map) { + x.beforeSave() + m.Save("inodeOperations", &x.inodeOperations) + m.Save("dirCursor", &x.dirCursor) + m.Save("flags", &x.flags) +} + +func (x *fileOperations) load(m state.Map) { + m.LoadWait("inodeOperations", &x.inodeOperations) + m.Load("dirCursor", &x.dirCursor) + m.LoadWait("flags", &x.flags) + m.AfterLoad(x.afterLoad) +} + +func (x *filesystem) beforeSave() {} +func (x *filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystem) afterLoad() {} +func (x *filesystem) load(m state.Map) { +} + +func (x *inodeOperations) beforeSave() {} +func (x *inodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("fileState", &x.fileState) + m.Save("cachingInodeOps", &x.cachingInodeOps) +} + +func (x *inodeOperations) afterLoad() {} +func (x *inodeOperations) load(m state.Map) { + m.LoadWait("fileState", &x.fileState) + m.Load("cachingInodeOps", &x.cachingInodeOps) +} + +func (x *inodeFileState) save(m state.Map) { + x.beforeSave() + var loading struct{} = x.saveLoading() + m.SaveValue("loading", loading) + m.Save("s", &x.s) + m.Save("sattr", &x.sattr) + m.Save("savedUAttr", &x.savedUAttr) + m.Save("hostMappable", &x.hostMappable) +} + +func (x *inodeFileState) load(m state.Map) { + m.LoadWait("s", &x.s) + m.LoadWait("sattr", &x.sattr) + m.Load("savedUAttr", &x.savedUAttr) + m.Load("hostMappable", &x.hostMappable) + m.LoadValue("loading", new(struct{}), func(y interface{}) { x.loadLoading(y.(struct{})) }) + m.AfterLoad(x.afterLoad) +} + +func (x *endpointMaps) beforeSave() {} +func (x *endpointMaps) save(m state.Map) { + x.beforeSave() + m.Save("direntMap", &x.direntMap) + m.Save("pathMap", &x.pathMap) +} + +func (x *endpointMaps) afterLoad() {} +func (x *endpointMaps) load(m state.Map) { + m.Load("direntMap", &x.direntMap) + m.Load("pathMap", &x.pathMap) +} + +func (x *session) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("msize", &x.msize) + m.Save("version", &x.version) + m.Save("cachePolicy", &x.cachePolicy) + m.Save("aname", &x.aname) + m.Save("superBlockFlags", &x.superBlockFlags) + m.Save("connID", &x.connID) + m.Save("inodeMappings", &x.inodeMappings) + m.Save("mounter", &x.mounter) + m.Save("endpoints", &x.endpoints) +} + +func (x *session) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.LoadWait("msize", &x.msize) + m.LoadWait("version", &x.version) + m.LoadWait("cachePolicy", &x.cachePolicy) + m.LoadWait("aname", &x.aname) + m.LoadWait("superBlockFlags", &x.superBlockFlags) + m.LoadWait("connID", &x.connID) + m.LoadWait("inodeMappings", &x.inodeMappings) + m.LoadWait("mounter", &x.mounter) + m.LoadWait("endpoints", &x.endpoints) + m.AfterLoad(x.afterLoad) +} + +func init() { + state.Register("gofer.fileOperations", (*fileOperations)(nil), state.Fns{Save: (*fileOperations).save, Load: (*fileOperations).load}) + state.Register("gofer.filesystem", (*filesystem)(nil), state.Fns{Save: (*filesystem).save, Load: (*filesystem).load}) + state.Register("gofer.inodeOperations", (*inodeOperations)(nil), state.Fns{Save: (*inodeOperations).save, Load: (*inodeOperations).load}) + state.Register("gofer.inodeFileState", (*inodeFileState)(nil), state.Fns{Save: (*inodeFileState).save, Load: (*inodeFileState).load}) + state.Register("gofer.endpointMaps", (*endpointMaps)(nil), state.Fns{Save: (*endpointMaps).save, Load: (*endpointMaps).load}) + state.Register("gofer.session", (*session)(nil), state.Fns{Save: (*session).save, Load: (*session).load}) +} diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go deleted file mode 100644 index 7fc3c32ae..000000000 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gofer - -import ( - "fmt" - "syscall" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/p9/p9test" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// rootTest runs a test with a p9 mock and an fs.InodeOperations created from -// the attached root directory. The root file will be closed and client -// disconnected, but additional files must be closed manually. -func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context, *p9test.Harness, *p9test.Mock, *fs.Inode)) { - t.Run(name, func(t *testing.T) { - h, c := p9test.NewHarness(t) - defer h.Finish() - - // Create a new root. Note that we pass an empty, but non-nil - // map here. This allows tests to extend the root children - // dynamically. - root := h.NewDirectory(map[string]p9test.Generator{})(nil) - - // Return this as the root. - h.Attacher.EXPECT().Attach().Return(root, nil).Times(1) - - // ... and open via the client. - rootFile, err := c.Attach("/") - if err != nil { - t.Fatalf("unable to attach: %v", err) - } - defer rootFile.Close() - - // Wrap an a session. - s := &session{ - mounter: fs.RootOwner, - cachePolicy: cp, - client: c, - } - - // ... and an INode, with only the mode being explicitly valid for now. - ctx := contexttest.Context(t) - sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ - file: rootFile, - }, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */) - m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) - rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) - - // Ensure that the cache is fully invalidated, so that any - // close actions actually take place before the full harness is - // torn down. - defer func() { - m.FlushDirentRefs() - - // Wait for all resources to be released, otherwise the - // operations may fail after we close the rootFile. - fs.AsyncBarrier() - }() - - // Execute the test. - fn(ctx, h, root, rootInode) - }) -} - -func TestLookup(t *testing.T) { - type lookupTest struct { - // Name of the test. - name string - - // Expected return value. - want error - } - - tests := []lookupTest{ - { - name: "mock Walk passes (function succeeds)", - want: nil, - }, - { - name: "mock Walk fails (function fails)", - want: syscall.ENOENT, - }, - } - - const file = "file" // The walked target file. - - for _, test := range tests { - rootTest(t, test.name, cacheNone, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { - // Setup the appropriate result. - rootFile.WalkCallback = func() error { - return test.want - } - if test.want == nil { - // Set the contents of the root. We expect a - // normal file generator for ppp above. This is - // overriden by setting WalkErr in the mock. - rootFile.AddChild(file, h.NewFile()) - } - - // Call function. - dirent, err := rootInode.Lookup(ctx, file) - - // Unwrap the InodeOperations. - var newInodeOperations fs.InodeOperations - if dirent != nil { - if dirent.IsNegative() { - err = syscall.ENOENT - } else { - newInodeOperations = dirent.Inode.InodeOperations - } - } - - // Check return values. - if err != test.want { - t.Errorf("Lookup got err %v, want %v", err, test.want) - } - if err == nil && newInodeOperations == nil { - t.Errorf("Lookup got non-nil err and non-nil node, wanted at least one non-nil") - } - }) - } -} - -func TestRevalidation(t *testing.T) { - type revalidationTest struct { - cachePolicy cachePolicy - - // Whether dirent should be reloaded before any modifications. - preModificationWantReload bool - - // Whether dirent should be reloaded after updating an unstable - // attribute on the remote fs. - postModificationWantReload bool - - // Whether dirent unstable attributes should be updated after - // updating an attribute on the remote fs. - postModificationWantUpdatedAttrs bool - - // Whether dirent should be reloaded after the remote has - // removed the file. - postRemovalWantReload bool - } - - tests := []revalidationTest{ - { - // Policy cacheNone causes Revalidate to always return - // true. - cachePolicy: cacheNone, - preModificationWantReload: true, - postModificationWantReload: true, - postModificationWantUpdatedAttrs: true, - postRemovalWantReload: true, - }, - { - // Policy cacheAll causes Revalidate to always return - // false. - cachePolicy: cacheAll, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: false, - postRemovalWantReload: false, - }, - { - // Policy cacheAllWritethrough causes Revalidate to - // always return false. - cachePolicy: cacheAllWritethrough, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: false, - postRemovalWantReload: false, - }, - { - // Policy cacheRemoteRevalidating causes Revalidate to - // return update cached unstable attrs, and returns - // true only when the remote inode itself has been - // removed or replaced. - cachePolicy: cacheRemoteRevalidating, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: true, - postRemovalWantReload: true, - }, - } - - const file = "file" // The file walked below. - - for _, test := range tests { - name := fmt.Sprintf("cachepolicy=%s", test.cachePolicy) - rootTest(t, name, test.cachePolicy, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { - // Wrap in a dirent object. - rootDir := fs.NewDirent(ctx, rootInode, "root") - - // Create a mock file a child of the root. We save when - // this is generated, so that when the time changed, we - // can update the original entry. - var origMocks []*p9test.Mock - rootFile.AddChild(file, func(parent *p9test.Mock) *p9test.Mock { - // Regular a regular file that has a consistent - // path number. This might be used by - // validation so we don't change it. - m := h.NewMock(parent, 0, p9.Attr{ - Mode: p9.ModeRegular, - }) - origMocks = append(origMocks, m) - return m - }) - - // Do the walk. - dirent, err := rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - - // We must release the dirent, of the test will fail - // with a reference leak. This is tracked by p9test. - defer dirent.DecRef() - - // Walk again. Depending on the cache policy, we may - // get a new dirent. - newDirent, err := rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - if test.preModificationWantReload && dirent == newDirent { - t.Errorf("Lookup with cachePolicy=%s got old dirent %+v, wanted a new dirent", test.cachePolicy, dirent) - } - if !test.preModificationWantReload && dirent != newDirent { - t.Errorf("Lookup with cachePolicy=%s got new dirent %+v, wanted old dirent %+v", test.cachePolicy, newDirent, dirent) - } - newDirent.DecRef() // See above. - - // Modify the underlying mocked file's modification - // time for the next walk that occurs. - nowSeconds := time.Now().Unix() - rootFile.AddChild(file, func(parent *p9test.Mock) *p9test.Mock { - // Ensure that the path is the same as above, - // but we change only the modification time of - // the file. - return h.NewMock(parent, 0, p9.Attr{ - Mode: p9.ModeRegular, - MTimeSeconds: uint64(nowSeconds), - }) - }) - - // We also modify the original time, so that GetAttr - // behaves as expected for the caching case. - for _, m := range origMocks { - m.Attr.MTimeSeconds = uint64(nowSeconds) - } - - // Walk again. Depending on the cache policy, we may - // get a new dirent. - newDirent, err = rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - if test.postModificationWantReload && dirent == newDirent { - t.Errorf("Lookup with cachePolicy=%s got old dirent, wanted a new dirent", test.cachePolicy) - } - if !test.postModificationWantReload && dirent != newDirent { - t.Errorf("Lookup with cachePolicy=%s got new dirent, wanted old dirent", test.cachePolicy) - } - uattrs, err := newDirent.Inode.UnstableAttr(ctx) - if err != nil { - t.Fatalf("Error getting unstable attrs: %v", err) - } - gotModTimeSeconds := uattrs.ModificationTime.Seconds() - if test.postModificationWantUpdatedAttrs && gotModTimeSeconds != nowSeconds { - t.Fatalf("Lookup with cachePolicy=%s got new modification time %v, wanted %v", test.cachePolicy, gotModTimeSeconds, nowSeconds) - } - newDirent.DecRef() // See above. - - // Remove the file from the remote fs, subsequent walks - // should now fail to find anything. - rootFile.RemoveChild(file) - - // Walk again. Depending on the cache policy, we may - // get ENOENT. - newDirent, err = rootDir.Walk(ctx, rootDir, file) - if test.postRemovalWantReload && err == nil { - t.Errorf("Lookup with cachePolicy=%s got nil error, wanted ENOENT", test.cachePolicy) - } - if !test.postRemovalWantReload && (err != nil || dirent != newDirent) { - t.Errorf("Lookup with cachePolicy=%s got new dirent and error %v, wanted old dirent and nil error", test.cachePolicy, err) - } - if err == nil { - newDirent.DecRef() // See above. - } - }) - } -} diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD deleted file mode 100644 index b1080fb1a..000000000 --- a/pkg/sentry/fs/host/BUILD +++ /dev/null @@ -1,83 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "host", - srcs = [ - "control.go", - "descriptor.go", - "descriptor_state.go", - "device.go", - "file.go", - "fs.go", - "inode.go", - "inode_state.go", - "ioctl_unsafe.go", - "socket.go", - "socket_iovec.go", - "socket_state.go", - "socket_unsafe.go", - "tty.go", - "util.go", - "util_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/host", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/log", - "//pkg/refs", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/safemem", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/unet", - "//pkg/waiter", - ], -) - -go_test( - name = "host_test", - size = "small", - srcs = [ - "descriptor_test.go", - "fs_test.go", - "inode_test.go", - "socket_test.go", - "wait_test.go", - ], - embed = [":host"], - deps = [ - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go deleted file mode 100644 index 4205981f5..000000000 --- a/pkg/sentry/fs/host/descriptor_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "io/ioutil" - "path/filepath" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestDescriptorRelease(t *testing.T) { - for _, tc := range []struct { - name string - saveable bool - wouldBlock bool - }{ - {name: "all false"}, - {name: "saveable", saveable: true}, - {name: "wouldBlock", wouldBlock: true}, - } { - t.Run(tc.name, func(t *testing.T) { - dir, err := ioutil.TempDir("", "descriptor_test") - if err != nil { - t.Fatal("ioutil.TempDir() failed:", err) - } - - fd, err := syscall.Open(filepath.Join(dir, "file"), syscall.O_RDWR|syscall.O_CREAT, 0666) - if err != nil { - t.Fatal("failed to open temp file:", err) - } - - // FD ownership is transferred to the descritor. - queue := &waiter.Queue{} - d, err := newDescriptor(fd, false /* donated*/, tc.saveable, tc.wouldBlock, queue) - if err != nil { - syscall.Close(fd) - t.Fatalf("newDescriptor(%d, %t, false, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) - } - if tc.saveable { - if d.origFD < 0 { - t.Errorf("saveable descriptor must preserve origFD, desc: %+v", d) - } - } - if tc.wouldBlock { - if !fdnotifier.HasFD(int32(d.value)) { - t.Errorf("FD not registered with notifier, desc: %+v", d) - } - } - - oldVal := d.value - d.Release() - if d.value != -1 { - t.Errorf("d.value want: -1, got: %d", d.value) - } - if tc.wouldBlock { - if fdnotifier.HasFD(int32(oldVal)) { - t.Errorf("FD not unregistered with notifier, desc: %+v", d) - } - } - }) - } -} diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go deleted file mode 100644 index c6852ee30..000000000 --- a/pkg/sentry/fs/host/fs_test.go +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "fmt" - "io/ioutil" - "os" - "path" - "reflect" - "sort" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// newTestMountNamespace creates a MountNamespace with a ramfs root. -// It returns the host folder created, which should be removed when done. -func newTestMountNamespace(t *testing.T) (*fs.MountNamespace, string, error) { - p, err := ioutil.TempDir("", "root") - if err != nil { - return nil, "", err - } - - fd, err := open(nil, p) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - ctx := contexttest.Context(t) - root, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - mm, err := fs.NewMountNamespace(ctx, root) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - return mm, p, nil -} - -// createTestDirs populates the root with some test files and directories. -// /a/a1.txt -// /a/a2.txt -// /b/b1.txt -// /b/c/c1.txt -// /symlinks/normal.txt -// /symlinks/to_normal.txt -> /symlinks/normal.txt -// /symlinks/recursive -> /symlinks -func createTestDirs(ctx context.Context, t *testing.T, m *fs.MountNamespace) error { - r := m.Root() - defer r.DecRef() - - if err := r.CreateDirectory(ctx, r, "a", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - a, err := r.Walk(ctx, r, "a") - if err != nil { - return err - } - defer a.DecRef() - - a1, err := a.Create(ctx, r, "a1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - a1.DecRef() - - a2, err := a.Create(ctx, r, "a2.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - a2.DecRef() - - if err := r.CreateDirectory(ctx, r, "b", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - b, err := r.Walk(ctx, r, "b") - if err != nil { - return err - } - defer b.DecRef() - - b1, err := b.Create(ctx, r, "b1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - b1.DecRef() - - if err := b.CreateDirectory(ctx, r, "c", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - c, err := b.Walk(ctx, r, "c") - if err != nil { - return err - } - defer c.DecRef() - - c1, err := c.Create(ctx, r, "c1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - c1.DecRef() - - if err := r.CreateDirectory(ctx, r, "symlinks", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - symlinks, err := r.Walk(ctx, r, "symlinks") - if err != nil { - return err - } - defer symlinks.DecRef() - - normal, err := symlinks.Create(ctx, r, "normal.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - normal.DecRef() - - if err := symlinks.CreateLink(ctx, r, "/symlinks/normal.txt", "to_normal.txt"); err != nil { - return err - } - - return symlinks.CreateLink(ctx, r, "/symlinks", "recursive") -} - -// allPaths returns a slice of all paths of entries visible in the rootfs. -func allPaths(ctx context.Context, t *testing.T, m *fs.MountNamespace, base string) ([]string, error) { - var paths []string - root := m.Root() - defer root.DecRef() - - maxTraversals := uint(1) - d, err := m.FindLink(ctx, root, nil, base, &maxTraversals) - if err != nil { - t.Logf("FindLink failed for %q", base) - return paths, err - } - defer d.DecRef() - - if fs.IsDir(d.Inode.StableAttr) { - dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - return nil, fmt.Errorf("failed to open directory %q: %v", base, err) - } - iter, ok := dir.FileOperations.(fs.DirIterator) - if !ok { - return nil, fmt.Errorf("cannot directly iterate on host directory %q", base) - } - dirCtx := &fs.DirCtx{ - Serializer: noopDentrySerializer{}, - } - if _, err := fs.DirentReaddir(ctx, d, iter, root, dirCtx, 0); err != nil { - return nil, err - } - for name := range dirCtx.DentAttrs() { - if name == "." || name == ".." { - continue - } - - fullName := path.Join(base, name) - paths = append(paths, fullName) - - // Recurse. - subpaths, err := allPaths(ctx, t, m, fullName) - if err != nil { - return paths, err - } - paths = append(paths, subpaths...) - } - } - - return paths, nil -} - -type noopDentrySerializer struct{} - -func (noopDentrySerializer) CopyOut(string, fs.DentAttr) error { - return nil -} -func (noopDentrySerializer) Written() int { - return 4096 -} - -// pathsEqual returns true if the two string slices contain the same entries. -func pathsEqual(got, want []string) bool { - sort.Strings(got) - sort.Strings(want) - - if len(got) != len(want) { - return false - } - - for i := range got { - if got[i] != want[i] { - return false - } - } - - return true -} - -func TestWhitelist(t *testing.T) { - for _, test := range []struct { - // description of the test. - desc string - // paths are the paths to whitelist - paths []string - // want are all of the directory entries that should be - // visible (nothing beyond this set should be visible). - want []string - }{ - { - desc: "root", - paths: []string{"/"}, - want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt", "/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt", "/symlinks/recursive"}, - }, - { - desc: "top-level directories", - paths: []string{"/a", "/b"}, - want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "nested directories (1/2)", - paths: []string{"/b", "/b/c"}, - want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "nested directories (2/2)", - paths: []string{"/b/c", "/b"}, - want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "single file", - paths: []string{"/b/c/c1.txt"}, - want: []string{"/b", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "single file and directory", - paths: []string{"/a/a1.txt", "/b/c"}, - want: []string{"/a", "/a/a1.txt", "/b", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "symlink", - paths: []string{"/symlinks/to_normal.txt"}, - want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt"}, - }, - { - desc: "recursive symlink", - paths: []string{"/symlinks/recursive/normal.txt"}, - want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/recursive"}, - }, - } { - t.Run(test.desc, func(t *testing.T) { - m, p, err := newTestMountNamespace(t) - if err != nil { - t.Errorf("Failed to create MountNamespace: %v", err) - } - defer os.RemoveAll(p) - - ctx := withRoot(contexttest.RootContext(t), m.Root()) - if err := createTestDirs(ctx, t, m); err != nil { - t.Errorf("Failed to create test dirs: %v", err) - } - - if err := installWhitelist(ctx, m, test.paths); err != nil { - t.Errorf("installWhitelist(%v) err got %v want nil", test.paths, err) - } - - got, err := allPaths(ctx, t, m, "/") - if err != nil { - t.Fatalf("Failed to lookup paths (whitelisted: %v): %v", test.paths, err) - } - - if !pathsEqual(got, test.want) { - t.Errorf("For paths %v got %v want %v", test.paths, got, test.want) - } - }) - } -} - -func TestRootPath(t *testing.T) { - // Create a temp dir, which will be the root of our mounted fs. - rootPath, err := ioutil.TempDir(os.TempDir(), "root") - if err != nil { - t.Fatalf("TempDir failed: %v", err) - } - defer os.RemoveAll(rootPath) - - // Create two files inside the new root, one which will be whitelisted - // and one not. - whitelisted, err := ioutil.TempFile(rootPath, "white") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - if _, err := ioutil.TempFile(rootPath, "black"); err != nil { - t.Fatalf("TempFile failed: %v", err) - } - - // Create a mount with a root path and single whitelisted file. - hostFS := &Filesystem{} - ctx := contexttest.Context(t) - data := fmt.Sprintf("%s=%s,%s=%s", rootPathKey, rootPath, whitelistKey, whitelisted.Name()) - inode, err := hostFS.Mount(ctx, "", fs.MountSourceFlags{}, data, nil) - if err != nil { - t.Fatalf("Mount failed: %v", err) - } - mm, err := fs.NewMountNamespace(ctx, inode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - if err := hostFS.InstallWhitelist(ctx, mm); err != nil { - t.Fatalf("InstallWhitelist failed: %v", err) - } - - // Get the contents of the root directory. - rootDir := mm.Root() - rctx := withRoot(ctx, rootDir) - f, err := rootDir.Inode.GetFile(rctx, rootDir, fs.FileFlags{}) - if err != nil { - t.Fatalf("GetFile failed: %v", err) - } - c := &fs.CollectEntriesSerializer{} - if err := f.Readdir(rctx, c); err != nil { - t.Fatalf("Readdir failed: %v", err) - } - - // We should have only our whitelisted file, plus the dots. - want := []string{path.Base(whitelisted.Name()), ".", ".."} - got := c.Order - sort.Strings(want) - sort.Strings(got) - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got %v, wanted %v", got, want) - } -} - -type rootContext struct { - context.Context - root *fs.Dirent -} - -// withRoot returns a copy of ctx with the given root. -func withRoot(ctx context.Context, root *fs.Dirent) context.Context { - return &rootContext{ - Context: ctx, - root: root, - } -} - -// Value implements Context.Value. -func (rc rootContext) Value(key interface{}) interface{} { - switch key { - case fs.CtxRoot: - rc.root.IncRef() - return rc.root - default: - return rc.Context.Value(key) - } -} diff --git a/pkg/sentry/fs/host/host_state_autogen.go b/pkg/sentry/fs/host/host_state_autogen.go new file mode 100755 index 000000000..f0e1c4b88 --- /dev/null +++ b/pkg/sentry/fs/host/host_state_autogen.go @@ -0,0 +1,138 @@ +// automatically generated by stateify. + +package host + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *descriptor) save(m state.Map) { + x.beforeSave() + m.Save("donated", &x.donated) + m.Save("origFD", &x.origFD) + m.Save("wouldBlock", &x.wouldBlock) +} + +func (x *descriptor) load(m state.Map) { + m.Load("donated", &x.donated) + m.Load("origFD", &x.origFD) + m.Load("wouldBlock", &x.wouldBlock) + m.AfterLoad(x.afterLoad) +} + +func (x *fileOperations) beforeSave() {} +func (x *fileOperations) save(m state.Map) { + x.beforeSave() + m.Save("iops", &x.iops) + m.Save("dirCursor", &x.dirCursor) +} + +func (x *fileOperations) afterLoad() {} +func (x *fileOperations) load(m state.Map) { + m.LoadWait("iops", &x.iops) + m.Load("dirCursor", &x.dirCursor) +} + +func (x *Filesystem) beforeSave() {} +func (x *Filesystem) save(m state.Map) { + x.beforeSave() + m.Save("paths", &x.paths) +} + +func (x *Filesystem) afterLoad() {} +func (x *Filesystem) load(m state.Map) { + m.Load("paths", &x.paths) +} + +func (x *superOperations) beforeSave() {} +func (x *superOperations) save(m state.Map) { + x.beforeSave() + m.Save("SimpleMountSourceOperations", &x.SimpleMountSourceOperations) + m.Save("root", &x.root) + m.Save("inodeMappings", &x.inodeMappings) + m.Save("mounter", &x.mounter) + m.Save("dontTranslateOwnership", &x.dontTranslateOwnership) +} + +func (x *superOperations) afterLoad() {} +func (x *superOperations) load(m state.Map) { + m.Load("SimpleMountSourceOperations", &x.SimpleMountSourceOperations) + m.Load("root", &x.root) + m.Load("inodeMappings", &x.inodeMappings) + m.Load("mounter", &x.mounter) + m.Load("dontTranslateOwnership", &x.dontTranslateOwnership) +} + +func (x *inodeOperations) beforeSave() {} +func (x *inodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("fileState", &x.fileState) + m.Save("cachingInodeOps", &x.cachingInodeOps) +} + +func (x *inodeOperations) afterLoad() {} +func (x *inodeOperations) load(m state.Map) { + m.LoadWait("fileState", &x.fileState) + m.Load("cachingInodeOps", &x.cachingInodeOps) +} + +func (x *inodeFileState) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.queue) { m.Failf("queue is %v, expected zero", x.queue) } + m.Save("mops", &x.mops) + m.Save("descriptor", &x.descriptor) + m.Save("sattr", &x.sattr) + m.Save("savedUAttr", &x.savedUAttr) +} + +func (x *inodeFileState) load(m state.Map) { + m.LoadWait("mops", &x.mops) + m.LoadWait("descriptor", &x.descriptor) + m.LoadWait("sattr", &x.sattr) + m.Load("savedUAttr", &x.savedUAttr) + m.AfterLoad(x.afterLoad) +} + +func (x *ConnectedEndpoint) save(m state.Map) { + x.beforeSave() + m.Save("ref", &x.ref) + m.Save("queue", &x.queue) + m.Save("path", &x.path) + m.Save("srfd", &x.srfd) + m.Save("stype", &x.stype) +} + +func (x *ConnectedEndpoint) load(m state.Map) { + m.Load("ref", &x.ref) + m.Load("queue", &x.queue) + m.Load("path", &x.path) + m.LoadWait("srfd", &x.srfd) + m.Load("stype", &x.stype) + m.AfterLoad(x.afterLoad) +} + +func (x *TTYFileOperations) beforeSave() {} +func (x *TTYFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("fileOperations", &x.fileOperations) + m.Save("session", &x.session) + m.Save("fgProcessGroup", &x.fgProcessGroup) +} + +func (x *TTYFileOperations) afterLoad() {} +func (x *TTYFileOperations) load(m state.Map) { + m.Load("fileOperations", &x.fileOperations) + m.Load("session", &x.session) + m.Load("fgProcessGroup", &x.fgProcessGroup) +} + +func init() { + state.Register("host.descriptor", (*descriptor)(nil), state.Fns{Save: (*descriptor).save, Load: (*descriptor).load}) + state.Register("host.fileOperations", (*fileOperations)(nil), state.Fns{Save: (*fileOperations).save, Load: (*fileOperations).load}) + state.Register("host.Filesystem", (*Filesystem)(nil), state.Fns{Save: (*Filesystem).save, Load: (*Filesystem).load}) + state.Register("host.superOperations", (*superOperations)(nil), state.Fns{Save: (*superOperations).save, Load: (*superOperations).load}) + state.Register("host.inodeOperations", (*inodeOperations)(nil), state.Fns{Save: (*inodeOperations).save, Load: (*inodeOperations).load}) + state.Register("host.inodeFileState", (*inodeFileState)(nil), state.Fns{Save: (*inodeFileState).save, Load: (*inodeFileState).load}) + state.Register("host.ConnectedEndpoint", (*ConnectedEndpoint)(nil), state.Fns{Save: (*ConnectedEndpoint).save, Load: (*ConnectedEndpoint).load}) + state.Register("host.TTYFileOperations", (*TTYFileOperations)(nil), state.Fns{Save: (*TTYFileOperations).save, Load: (*TTYFileOperations).load}) +} diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go deleted file mode 100644 index 2d959f10d..000000000 --- a/pkg/sentry/fs/host/inode_test.go +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "io/ioutil" - "os" - "path" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// TestMultipleReaddir verifies that multiple Readdir calls return the same -// thing if they use different dir contexts. -func TestMultipleReaddir(t *testing.T) { - p, err := ioutil.TempDir("", "readdir") - if err != nil { - t.Fatalf("Failed to create test dir: %v", err) - } - defer os.RemoveAll(p) - - f, err := os.Create(path.Join(p, "a.txt")) - if err != nil { - t.Fatalf("Failed to create a.txt: %v", err) - } - f.Close() - - f, err = os.Create(path.Join(p, "b.txt")) - if err != nil { - t.Fatalf("Failed to create b.txt: %v", err) - } - f.Close() - - fd, err := open(nil, p) - if err != nil { - t.Fatalf("Failed to open %q: %v", p, err) - } - ctx := contexttest.Context(t) - n, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false) - if err != nil { - t.Fatalf("Failed to create inode: %v", err) - } - - dirent := fs.NewDirent(ctx, n, "readdir") - openFile, err := n.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("Failed to get file: %v", err) - } - defer openFile.DecRef() - - c1 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil { - t.Fatalf("First Readdir failed: %v", err) - } - - c2 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil { - t.Errorf("Second Readdir failed: %v", err) - } - - if _, ok := c1.DentAttrs()["a.txt"]; !ok { - t.Errorf("want a.txt in first Readdir, got %v", c1.DentAttrs()) - } - if _, ok := c1.DentAttrs()["b.txt"]; !ok { - t.Errorf("want b.txt in first Readdir, got %v", c1.DentAttrs()) - } - - if _, ok := c2.DentAttrs()["a.txt"]; !ok { - t.Errorf("want a.txt in second Readdir, got %v", c2.DentAttrs()) - } - if _, ok := c2.DentAttrs()["b.txt"]; !ok { - t.Errorf("want b.txt in second Readdir, got %v", c2.DentAttrs()) - } -} - -// TestCloseFD verifies fds will be closed. -func TestCloseFD(t *testing.T) { - var p [2]int - if err := syscall.Pipe(p[0:]); err != nil { - t.Fatalf("Failed to create pipe %v", err) - } - defer syscall.Close(p[0]) - defer syscall.Close(p[1]) - - // Use the write-end because we will detect if it's closed on the read end. - ctx := contexttest.Context(t) - file, err := NewFile(ctx, p[1], fs.RootOwner) - if err != nil { - t.Fatalf("Failed to create File: %v", err) - } - file.DecRef() - - s := make([]byte, 10) - if c, err := syscall.Read(p[0], s); c != 0 || err != nil { - t.Errorf("want 0, nil (EOF) from read end, got %v, %v", c, err) - } -} diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go deleted file mode 100644 index 68b38fd1c..000000000 --- a/pkg/sentry/fs/host/socket_test.go +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "reflect" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/waiter" -) - -var ( - // Make sure that ConnectedEndpoint implements transport.ConnectedEndpoint. - _ = transport.ConnectedEndpoint(new(ConnectedEndpoint)) - - // Make sure that ConnectedEndpoint implements transport.Receiver. - _ = transport.Receiver(new(ConnectedEndpoint)) -) - -func getFl(fd int) (uint32, error) { - fl, _, err := syscall.RawSyscall(syscall.SYS_FCNTL, uintptr(fd), syscall.F_GETFL, 0) - if err == 0 { - return uint32(fl), nil - } - return 0, err -} - -func TestSocketIsBlocking(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - - fl, err := getFl(pair[0]) - if err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) - } - if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { - t.Fatalf("Expected socket %v to be blocking", pair[0]) - } - if fl, err = getFl(pair[1]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[1], err) - } - if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { - t.Fatalf("Expected socket %v to be blocking", pair[1]) - } - sock, err := newSocket(contexttest.Context(t), pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) failed => %v", pair[0], err) - } - defer sock.DecRef() - // Test that the socket now is non-blocking. - if fl, err = getFl(pair[0]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) - } - if fl&syscall.O_NONBLOCK != syscall.O_NONBLOCK { - t.Errorf("Expected socket %v to have become non-blocking", pair[0]) - } - if fl, err = getFl(pair[1]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[1], err) - } - if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { - t.Errorf("Did not expect socket %v to become non-blocking", pair[1]) - } -} - -func TestSocketWritev(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - socket, err := newSocket(contexttest.Context(t), pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer socket.DecRef() - buf := []byte("hello world\n") - n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(buf)) - if err != nil { - t.Fatalf("socket writev failed: %v", err) - } - - if n != int64(len(buf)) { - t.Fatalf("socket writev wrote incorrect bytes: %d", n) - } -} - -func TestSocketWritevLen0(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - socket, err := newSocket(contexttest.Context(t), pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer socket.DecRef() - n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(nil)) - if err != nil { - t.Fatalf("socket writev failed: %v", err) - } - - if n != 0 { - t.Fatalf("socket writev wrote incorrect bytes: %d", n) - } -} - -func TestSocketSendMsgLen0(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - sfile, err := newSocket(contexttest.Context(t), pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer sfile.DecRef() - - s := sfile.FileOperations.(socket.Socket) - n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, false, ktime.Time{}, socket.ControlMessages{}) - if n != 0 { - t.Fatalf("socket sendmsg() failed: %v wrote: %d", terr, n) - } - - if terr != nil { - t.Fatalf("socket sendmsg() failed: %v", terr) - } -} - -func TestListen(t *testing.T) { - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err) - } - sfile1, err := newSocket(contexttest.Context(t), pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer sfile1.DecRef() - socket1 := sfile1.FileOperations.(socket.Socket) - - sfile2, err := newSocket(contexttest.Context(t), pair[1], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[1], err) - } - defer sfile2.DecRef() - socket2 := sfile2.FileOperations.(socket.Socket) - - // Socketpairs can not be listened to. - if err := socket1.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket1.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } - if err := socket2.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket2.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } - - // Create a Unix socket, do not bind it. - sock, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err) - } - sfile3, err := newSocket(contexttest.Context(t), sock, false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", sock, err) - } - defer sfile3.DecRef() - socket3 := sfile3.FileOperations.(socket.Socket) - - // This socket is not bound so we can't listen on it. - if err := socket3.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket3.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } -} - -func TestPasscred(t *testing.T) { - e := ConnectedEndpoint{} - if got, want := e.Passcred(), false; got != want { - t.Errorf("Got %#v.Passcred() = %t, want = %t", e, got, want) - } -} - -func TestGetLocalAddress(t *testing.T) { - e := ConnectedEndpoint{path: "foo"} - want := tcpip.FullAddress{Addr: tcpip.Address("foo")} - if got, err := e.GetLocalAddress(); err != nil || got != want { - t.Errorf("Got %#v.GetLocalAddress() = %#v, %v, want = %#v, %v", e, got, err, want, nil) - } -} - -func TestQueuedSize(t *testing.T) { - e := ConnectedEndpoint{} - tests := []struct { - name string - f func() int64 - }{ - {"SendQueuedSize", e.SendQueuedSize}, - {"RecvQueuedSize", e.RecvQueuedSize}, - } - - for _, test := range tests { - if got, want := test.f(), int64(-1); got != want { - t.Errorf("Got %#v.%s() = %d, want = %d", e, test.name, got, want) - } - } -} - -func TestRelease(t *testing.T) { - f, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c := &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)} - want := &ConnectedEndpoint{queue: c.queue} - want.ref.DecRef() - fdnotifier.AddFD(int32(c.file.FD()), nil) - c.Release() - if !reflect.DeepEqual(c, want) { - t.Errorf("got = %#v, want = %#v", c, want) - } -} diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go deleted file mode 100644 index 88d24d693..000000000 --- a/pkg/sentry/fs/host/wait_test.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "syscall" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestWait(t *testing.T) { - var fds [2]int - err := syscall.Pipe(fds[:]) - if err != nil { - t.Fatalf("Unable to create pipe: %v", err) - } - - defer syscall.Close(fds[1]) - - ctx := contexttest.Context(t) - file, err := NewFile(ctx, fds[0], fs.RootOwner) - if err != nil { - syscall.Close(fds[0]) - t.Fatalf("NewFile failed: %v", err) - } - - defer file.DecRef() - - r := file.Readiness(waiter.EventIn) - if r != 0 { - t.Fatalf("File is ready for read when it shouldn't be.") - } - - e, ch := waiter.NewChannelEntry(nil) - file.EventRegister(&e, waiter.EventIn) - defer file.EventUnregister(&e) - - // Check that there are no notifications yet. - if len(ch) != 0 { - t.Fatalf("Channel is non-empty") - } - - // Write to the pipe, so it should be writable now. - syscall.Write(fds[1], []byte{1}) - - // Check that we get a notification. We need to yield the current thread - // so that the fdnotifier can deliver notifications, so we use a - // 1-second timeout instead of just checking the length of the channel. - select { - case <-ch: - case <-time.After(1 * time.Second): - t.Fatalf("Channel not notified") - } -} diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go deleted file mode 100644 index 8935aad65..000000000 --- a/pkg/sentry/fs/inode_overlay_test.go +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/syserror" -) - -func TestLookup(t *testing.T) { - ctx := contexttest.Context(t) - for _, test := range []struct { - // Test description. - desc string - - // Lookup parameters. - dir *fs.Inode - name string - - // Want from lookup. - found bool - hasUpper bool - hasLower bool - }{ - { - desc: "no upper, lower has name", - dir: fs.NewTestOverlayDir(ctx, - nil, /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - { - desc: "no lower, upper has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - nil, /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, only lower has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "b", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - { - desc: "upper and lower, only upper has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "b", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, both have file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, both have directory", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: true, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: true, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: true, - }, - { - desc: "upper and lower, upper negative masks lower file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, nil, []string{"a"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: false, - hasUpper: false, - hasLower: false, - }, - { - desc: "upper and lower, upper negative does not mask lower file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, nil, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - } { - t.Run(test.desc, func(t *testing.T) { - dirent, err := test.dir.Lookup(ctx, test.name) - if test.found && (err == syserror.ENOENT || dirent.IsNegative()) { - t.Fatalf("lookup %q expected to find positive dirent, got dirent %v err %v", test.name, dirent, err) - } - if !test.found { - if err != syserror.ENOENT && !dirent.IsNegative() { - t.Errorf("lookup %q expected to return ENOENT or negative dirent, got dirent %v err %v", test.name, dirent, err) - } - // Nothing more to check. - return - } - if hasUpper := dirent.Inode.TestHasUpperFS(); hasUpper != test.hasUpper { - t.Fatalf("lookup got upper filesystem %v, want %v", hasUpper, test.hasUpper) - } - if hasLower := dirent.Inode.TestHasLowerFS(); hasLower != test.hasLower { - t.Errorf("lookup got lower filesystem %v, want %v", hasLower, test.hasLower) - } - }) - } -} - -func TestLookupRevalidation(t *testing.T) { - // File name used in the tests. - fileName := "foofile" - ctx := contexttest.Context(t) - for _, tc := range []struct { - // Test description. - desc string - - // Upper and lower fs for the overlay. - upper *fs.Inode - lower *fs.Inode - - // Whether the upper requires revalidation. - revalidate bool - - // Whether we should get the same dirent on second lookup. - wantSame bool - }{ - { - desc: "file from upper with no revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, nil, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from upper with revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, nil, nil), - revalidate: true, - wantSame: false, - }, - { - desc: "file from lower with no revalidation", - upper: newTestRamfsDir(ctx, nil, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from lower with revalidation", - upper: newTestRamfsDir(ctx, nil, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: true, - // The file does not exist in the upper, so we do not - // need to revalidate it. - wantSame: true, - }, - { - desc: "file from upper and lower with no revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from upper and lower with revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: true, - wantSame: false, - }, - } { - t.Run(tc.desc, func(t *testing.T) { - root := fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root") - ctx = &rootContext{ - Context: ctx, - root: root, - } - overlay := fs.NewDirent(ctx, fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay") - // Lookup the file twice through the overlay. - first, err := overlay.Walk(ctx, root, fileName) - if err != nil { - t.Fatalf("overlay.Walk(%q) failed: %v", fileName, err) - } - second, err := overlay.Walk(ctx, root, fileName) - if err != nil { - t.Fatalf("overlay.Walk(%q) failed: %v", fileName, err) - } - - if tc.wantSame && first != second { - t.Errorf("dirent lookup got different dirents, wanted same\nfirst=%+v\nsecond=%+v", first, second) - } else if !tc.wantSame && first == second { - t.Errorf("dirent lookup got the same dirent, wanted different: %+v", first) - } - }) - } -} - -func TestCacheFlush(t *testing.T) { - ctx := contexttest.Context(t) - - // Upper and lower each have a file. - upperFileName := "file-from-upper" - lowerFileName := "file-from-lower" - upper := newTestRamfsDir(ctx, []dirContent{{name: upperFileName}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: lowerFileName}}, nil) - - overlay := fs.NewTestOverlayDir(ctx, upper, lower, true /* revalidate */) - - mns, err := fs.NewMountNamespace(ctx, overlay) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - root := mns.Root() - defer root.DecRef() - - ctx = &rootContext{ - Context: ctx, - root: root, - } - - for _, fileName := range []string{upperFileName, lowerFileName} { - // Walk to the file. - maxTraversals := uint(0) - dirent, err := mns.FindInode(ctx, root, nil, fileName, &maxTraversals) - if err != nil { - t.Fatalf("FindInode(%q) failed: %v", fileName, err) - } - - // Get a file from the dirent. - file, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile() failed: %v", err) - } - - // The dirent should have 3 refs, one from us, one from the - // file, and one from the dirent cache. - // dirent cache. - if got, want := dirent.ReadRefs(), 3; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Drop the file reference. - file.DecRef() - - // Dirent should have 2 refs left. - if got, want := dirent.ReadRefs(), 2; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Flush the dirent cache. - mns.FlushMountSourceRefs() - - // Dirent should have 1 ref left from the dirent cache. - if got, want := dirent.ReadRefs(), 1; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Drop our ref. - dirent.DecRef() - - // We should be back to zero refs. - if got, want := dirent.ReadRefs(), 0; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - } - -} - -type dir struct { - fs.InodeOperations - - // List of negative child names. - negative []string - - // ReaddirCalled records whether Readdir was called on a file - // corresponding to this inode. - ReaddirCalled bool -} - -// Getxattr implements InodeOperations.Getxattr. -func (d *dir) Getxattr(inode *fs.Inode, name string) (string, error) { - for _, n := range d.negative { - if name == fs.XattrOverlayWhiteout(n) { - return "y", nil - } - } - return "", syserror.ENOATTR -} - -// GetFile implements InodeOperations.GetFile. -func (d *dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - file, err := d.InodeOperations.GetFile(ctx, dirent, flags) - if err != nil { - return nil, err - } - defer file.DecRef() - // Wrap the file's FileOperations in a dirFile. - fops := &dirFile{ - FileOperations: file.FileOperations, - inode: d, - } - return fs.NewFile(ctx, dirent, flags, fops), nil -} - -type dirContent struct { - name string - dir bool -} - -type dirFile struct { - fs.FileOperations - inode *dir -} - -type inode struct { - fsutil.InodeGenericChecker `state:"nosave"` - fsutil.InodeNoExtendedAttributes `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` - fsutil.InodeNoopWriteOut `state:"nosave"` - fsutil.InodeNotAllocatable `state:"nosave"` - fsutil.InodeNotDirectory `state:"nosave"` - fsutil.InodeNotMappable `state:"nosave"` - fsutil.InodeNotSocket `state:"nosave"` - fsutil.InodeNotSymlink `state:"nosave"` - fsutil.InodeNotTruncatable `state:"nosave"` - fsutil.InodeNotVirtual `state:"nosave"` - - fsutil.InodeSimpleAttributes - fsutil.InodeStaticFileGetter -} - -// Readdir implements fs.FileOperations.Readdir. It sets the ReaddirCalled -// field on the inode. -func (f *dirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { - f.inode.ReaddirCalled = true - return f.FileOperations.Readdir(ctx, file, ser) -} - -func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - inode := fs.NewInode(ctx, &inode{ - InodeStaticFileGetter: fsutil.InodeStaticFileGetter{ - Contents: []byte("foobar"), - }, - }, msrc, fs.StableAttr{Type: fs.RegularFile}) - return inode -} - -func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []string) *fs.Inode { - msrc := fs.NewPseudoMountSource(ctx) - contents := make(map[string]*fs.Inode) - for _, c := range contains { - if c.dir { - contents[c.name] = newTestRamfsDir(ctx, nil, nil) - } else { - contents[c.name] = newTestRamfsInode(ctx, msrc) - } - } - dops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermissions{ - User: fs.PermMask{Read: true, Execute: true}, - }) - return fs.NewInode(ctx, &dir{ - InodeOperations: dops, - negative: negative, - }, msrc, fs.StableAttr{Type: fs.Directory}) -} diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD deleted file mode 100644 index 08d7c0c57..000000000 --- a/pkg/sentry/fs/lock/BUILD +++ /dev/null @@ -1,58 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "lock_range", - out = "lock_range.go", - package = "lock", - prefix = "Lock", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "lock_set", - out = "lock_set.go", - consts = { - "minDegree": "3", - }, - package = "lock", - prefix = "Lock", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "LockRange", - "Value": "Lock", - "Functions": "lockSetFunctions", - }, -) - -go_library( - name = "lock", - srcs = [ - "lock.go", - "lock_range.go", - "lock_set.go", - "lock_set_functions.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/lock", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/log", - "//pkg/waiter", - ], -) - -go_test( - name = "lock_test", - size = "small", - srcs = [ - "lock_range_test.go", - "lock_test.go", - ], - embed = [":lock"], -) diff --git a/pkg/sentry/fs/lock/lock_range.go b/pkg/sentry/fs/lock/lock_range.go new file mode 100755 index 000000000..7a6f77640 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_range.go @@ -0,0 +1,62 @@ +package lock + +// A Range represents a contiguous range of T. +// +// +stateify savable +type LockRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r LockRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r LockRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r LockRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r LockRange) Overlaps(r2 LockRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r LockRange) IsSupersetOf(r2 LockRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r LockRange) Intersect(r2 LockRange) LockRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r LockRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/fs/lock/lock_range_test.go b/pkg/sentry/fs/lock/lock_range_test.go deleted file mode 100644 index 6221199d1..000000000 --- a/pkg/sentry/fs/lock/lock_range_test.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package lock - -import ( - "syscall" - "testing" -) - -func TestComputeRange(t *testing.T) { - tests := []struct { - // Description of test. - name string - - // Requested start of the lock range. - start int64 - - // Requested length of the lock range, - // can be negative :( - length int64 - - // Pre-computed file offset based on whence. - // Will be added to start. - offset int64 - - // Expected error. - err error - - // If error is nil, the expected LockRange. - LockRange - }{ - { - name: "offset, start, and length all zero", - LockRange: LockRange{Start: 0, End: LockEOF}, - }, - { - name: "zero offset, zero start, positive length", - start: 0, - length: 4096, - offset: 0, - LockRange: LockRange{Start: 0, End: 4096}, - }, - { - name: "zero offset, negative start", - start: -4096, - offset: 0, - err: syscall.EINVAL, - }, - { - name: "large offset, negative start, positive length", - start: -2048, - length: 2048, - offset: 4096, - LockRange: LockRange{Start: 2048, End: 4096}, - }, - { - name: "large offset, negative start, zero length", - start: -2048, - length: 0, - offset: 4096, - LockRange: LockRange{Start: 2048, End: LockEOF}, - }, - { - name: "zero offset, zero start, negative length", - start: 0, - length: -4096, - offset: 0, - err: syscall.EINVAL, - }, - { - name: "large offset, zero start, negative length", - start: 0, - length: -4096, - offset: 4096, - LockRange: LockRange{Start: 0, End: 4096}, - }, - { - name: "offset, start, and length equal, length is negative", - start: 1024, - length: -1024, - offset: 1024, - LockRange: LockRange{Start: 1024, End: 2048}, - }, - { - name: "offset, start, and length equal, start is negative", - start: -1024, - length: 1024, - offset: 1024, - LockRange: LockRange{Start: 0, End: 1024}, - }, - { - name: "offset, start, and length equal, offset is negative", - start: 1024, - length: 1024, - offset: -1024, - LockRange: LockRange{Start: 0, End: 1024}, - }, - { - name: "offset, start, and length equal, all negative", - start: -1024, - length: -1024, - offset: -1024, - err: syscall.EINVAL, - }, - { - name: "offset, start, and length equal, all positive", - start: 1024, - length: 1024, - offset: 1024, - LockRange: LockRange{Start: 2048, End: 3072}, - }, - } - - for _, test := range tests { - rng, err := ComputeRange(test.start, test.length, test.offset) - if err != test.err { - t.Errorf("%s: lockRange(%d, %d, %d) got error %v, want %v", test.name, test.start, test.length, test.offset, err, test.err) - continue - } - if err == nil && rng != test.LockRange { - t.Errorf("%s: lockRange(%d, %d, %d) got LockRange %v, want %v", test.name, test.start, test.length, test.offset, rng, test.LockRange) - } - } -} diff --git a/pkg/sentry/fs/lock/lock_set.go b/pkg/sentry/fs/lock/lock_set.go new file mode 100755 index 000000000..2343ca0b4 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_set.go @@ -0,0 +1,1270 @@ +package lock + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + LockminDegree = 3 + + LockmaxDegree = 2 * LockminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type LockSet struct { + root Locknode `state:".(*LockSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *LockSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *LockSet) IsEmptyRange(r LockRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *LockSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *LockSet) SpanRange(r LockRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *LockSet) FirstSegment() LockIterator { + if s.root.nrSegments == 0 { + return LockIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *LockSet) LastSegment() LockIterator { + if s.root.nrSegments == 0 { + return LockIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *LockSet) FirstGap() LockGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return LockGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *LockSet) LastGap() LockGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return LockGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *LockSet) Find(key uint64) (LockIterator, LockGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return LockIterator{n, i}, LockGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return LockIterator{}, LockGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *LockSet) FindSegment(key uint64) LockIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *LockSet) LowerBoundSegment(min uint64) LockIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *LockSet) UpperBoundSegment(max uint64) LockIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *LockSet) FindGap(key uint64) LockGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *LockSet) LowerBoundGap(min uint64) LockGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *LockSet) UpperBoundGap(max uint64) LockGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *LockSet) Add(r LockRange, val Lock) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *LockSet) AddWithoutMerging(r LockRange, val Lock) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *LockSet) Insert(gap LockGapIterator, r LockRange, val Lock) LockIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (lockSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (lockSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (lockSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *LockSet) InsertWithoutMerging(gap LockGapIterator, r LockRange, val Lock) LockIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *LockSet) InsertWithoutMergingUnchecked(gap LockGapIterator, r LockRange, val Lock) LockIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return LockIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *LockSet) Remove(seg LockIterator) LockGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + lockSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(LockGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *LockSet) RemoveAll() { + s.root = Locknode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *LockSet) RemoveRange(r LockRange) LockGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *LockSet) Merge(first, second LockIterator) LockIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *LockSet) MergeUnchecked(first, second LockIterator) LockIterator { + if first.End() == second.Start() { + if mval, ok := (lockSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return LockIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *LockSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *LockSet) MergeRange(r LockRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *LockSet) MergeAdjacent(r LockRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *LockSet) Split(seg LockIterator, split uint64) (LockIterator, LockIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *LockSet) SplitUnchecked(seg LockIterator, split uint64) (LockIterator, LockIterator) { + val1, val2 := (lockSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), LockRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *LockSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *LockSet) Isolate(seg LockIterator, r LockRange) LockIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *LockSet) ApplyContiguous(r LockRange, fn func(seg LockIterator)) LockGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return LockGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return LockGapIterator{} + } + } +} + +// +stateify savable +type Locknode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Locknode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [LockmaxDegree - 1]LockRange + values [LockmaxDegree - 1]Lock + children [LockmaxDegree]*Locknode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Locknode) firstSegment() LockIterator { + for n.hasChildren { + n = n.children[0] + } + return LockIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Locknode) lastSegment() LockIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return LockIterator{n, n.nrSegments - 1} +} + +func (n *Locknode) prevSibling() *Locknode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Locknode) nextSibling() *Locknode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Locknode) rebalanceBeforeInsert(gap LockGapIterator) LockGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < LockmaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:LockminDegree-1], n.keys[:LockminDegree-1]) + copy(left.values[:LockminDegree-1], n.values[:LockminDegree-1]) + copy(right.keys[:LockminDegree-1], n.keys[LockminDegree:]) + copy(right.values[:LockminDegree-1], n.values[LockminDegree:]) + n.keys[0], n.values[0] = n.keys[LockminDegree-1], n.values[LockminDegree-1] + LockzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:LockminDegree], n.children[:LockminDegree]) + copy(right.children[:LockminDegree], n.children[LockminDegree:]) + LockzeroNodeSlice(n.children[2:]) + for i := 0; i < LockminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < LockminDegree { + return LockGapIterator{left, gap.index} + } + return LockGapIterator{right, gap.index - LockminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[LockminDegree-1], n.values[LockminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:LockminDegree-1], n.keys[LockminDegree:]) + copy(sibling.values[:LockminDegree-1], n.values[LockminDegree:]) + LockzeroValueSlice(n.values[LockminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:LockminDegree], n.children[LockminDegree:]) + LockzeroNodeSlice(n.children[LockminDegree:]) + for i := 0; i < LockminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = LockminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < LockminDegree { + return gap + } + return LockGapIterator{sibling, gap.index - LockminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Locknode) rebalanceAfterRemove(gap LockGapIterator) LockGapIterator { + for { + if n.nrSegments >= LockminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= LockminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + lockSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return LockGapIterator{n, 0} + } + if gap.node == n { + return LockGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= LockminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + lockSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return LockGapIterator{n, n.nrSegments} + } + return LockGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return LockGapIterator{p, gap.index} + } + if gap.node == right { + return LockGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Locknode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = LockGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + lockSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type LockIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Locknode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg LockIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg LockIterator) Range() LockRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg LockIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg LockIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg LockIterator) SetRangeUnchecked(r LockRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetRange(r LockRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg LockIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg LockIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg LockIterator) Value() Lock { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg LockIterator) ValuePtr() *Lock { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg LockIterator) SetValue(val Lock) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg LockIterator) PrevSegment() LockIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return LockIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return LockIterator{} + } + return LocksegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg LockIterator) NextSegment() LockIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return LockIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return LockIterator{} + } + return LocksegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg LockIterator) PrevGap() LockGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return LockGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg LockIterator) NextGap() LockGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return LockGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg LockIterator) PrevNonEmpty() (LockIterator, LockGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return LockIterator{}, gap + } + return gap.PrevSegment(), LockGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg LockIterator) NextNonEmpty() (LockIterator, LockGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return LockIterator{}, gap + } + return gap.NextSegment(), LockGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type LockGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Locknode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap LockGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap LockGapIterator) Range() LockRange { + return LockRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap LockGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return lockSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap LockGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return lockSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap LockGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap LockGapIterator) PrevSegment() LockIterator { + return LocksegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap LockGapIterator) NextSegment() LockIterator { + return LocksegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap LockGapIterator) PrevGap() LockGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return LockGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap LockGapIterator) NextGap() LockGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return LockGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func LocksegmentBeforePosition(n *Locknode, i int) LockIterator { + for i == 0 { + if n.parent == nil { + return LockIterator{} + } + n, i = n.parent, n.parentIndex + } + return LockIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func LocksegmentAfterPosition(n *Locknode, i int) LockIterator { + for i == n.nrSegments { + if n.parent == nil { + return LockIterator{} + } + n, i = n.parent, n.parentIndex + } + return LockIterator{n, i} +} + +func LockzeroValueSlice(slice []Lock) { + + for i := range slice { + lockSetFunctions{}.ClearValue(&slice[i]) + } +} + +func LockzeroNodeSlice(slice []*Locknode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *LockSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Locknode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Locknode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type LockSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []Lock +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *LockSet) ExportSortedSlices() *LockSegmentDataSlices { + var sds LockSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *LockSet) ImportSortedSlices(sds *LockSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := LockRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *LockSet) saveRoot() *LockSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *LockSet) loadRoot(sds *LockSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/lock/lock_state_autogen.go b/pkg/sentry/fs/lock/lock_state_autogen.go new file mode 100755 index 000000000..cb69e2cd0 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_state_autogen.go @@ -0,0 +1,106 @@ +// automatically generated by stateify. + +package lock + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Lock) beforeSave() {} +func (x *Lock) save(m state.Map) { + x.beforeSave() + m.Save("Readers", &x.Readers) + m.Save("HasWriter", &x.HasWriter) + m.Save("Writer", &x.Writer) +} + +func (x *Lock) afterLoad() {} +func (x *Lock) load(m state.Map) { + m.Load("Readers", &x.Readers) + m.Load("HasWriter", &x.HasWriter) + m.Load("Writer", &x.Writer) +} + +func (x *Locks) beforeSave() {} +func (x *Locks) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.blockedQueue) { m.Failf("blockedQueue is %v, expected zero", x.blockedQueue) } + m.Save("locks", &x.locks) +} + +func (x *Locks) afterLoad() {} +func (x *Locks) load(m state.Map) { + m.Load("locks", &x.locks) +} + +func (x *LockRange) beforeSave() {} +func (x *LockRange) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) +} + +func (x *LockRange) afterLoad() {} +func (x *LockRange) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) +} + +func (x *LockSet) beforeSave() {} +func (x *LockSet) save(m state.Map) { + x.beforeSave() + var root *LockSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *LockSet) afterLoad() {} +func (x *LockSet) load(m state.Map) { + m.LoadValue("root", new(*LockSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*LockSegmentDataSlices)) }) +} + +func (x *Locknode) beforeSave() {} +func (x *Locknode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *Locknode) afterLoad() {} +func (x *Locknode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *LockSegmentDataSlices) beforeSave() {} +func (x *LockSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *LockSegmentDataSlices) afterLoad() {} +func (x *LockSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func init() { + state.Register("lock.Lock", (*Lock)(nil), state.Fns{Save: (*Lock).save, Load: (*Lock).load}) + state.Register("lock.Locks", (*Locks)(nil), state.Fns{Save: (*Locks).save, Load: (*Locks).load}) + state.Register("lock.LockRange", (*LockRange)(nil), state.Fns{Save: (*LockRange).save, Load: (*LockRange).load}) + state.Register("lock.LockSet", (*LockSet)(nil), state.Fns{Save: (*LockSet).save, Load: (*LockSet).load}) + state.Register("lock.Locknode", (*Locknode)(nil), state.Fns{Save: (*Locknode).save, Load: (*Locknode).load}) + state.Register("lock.LockSegmentDataSlices", (*LockSegmentDataSlices)(nil), state.Fns{Save: (*LockSegmentDataSlices).save, Load: (*LockSegmentDataSlices).load}) +} diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go deleted file mode 100644 index ba002aeb7..000000000 --- a/pkg/sentry/fs/lock/lock_test.go +++ /dev/null @@ -1,1059 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package lock - -import ( - "reflect" - "testing" -) - -type entry struct { - Lock - LockRange -} - -func equals(e0, e1 []entry) bool { - if len(e0) != len(e1) { - return false - } - for i := range e0 { - for k := range e0[i].Lock.Readers { - if !e1[i].Lock.Readers[k] { - return false - } - } - for k := range e1[i].Lock.Readers { - if !e0[i].Lock.Readers[k] { - return false - } - } - if !reflect.DeepEqual(e0[i].LockRange, e1[i].LockRange) { - return false - } - if e0[i].Lock.HasWriter != e1[i].Lock.HasWriter { - return false - } - if e0[i].Lock.Writer != e1[i].Lock.Writer { - return false - } - } - return true -} - -// fill a LockSet with consecutive region locks. Will panic if -// LockRanges are not consecutive. -func fill(entries []entry) LockSet { - l := LockSet{} - for _, e := range entries { - gap := l.FindGap(e.LockRange.Start) - if !gap.Ok() { - panic("cannot insert into existing segment") - } - l.Insert(gap, e.LockRange, e.Lock) - } - return l -} - -func TestCanLockEmpty(t *testing.T) { - l := LockSet{} - - // Expect to be able to take any locks given that the set is empty. - eof := l.FirstGap().End() - r := LockRange{0, eof} - if !l.canLock(1, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 1) - } - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - if !l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 2) - } -} - -func TestCanLock(t *testing.T) { - // + -------------- + ---------- + -------------- + --------- + - // | Readers 1 & 2 | Readers 1 | Readers 1 & 3 | Writer 1 | - // + ------------- + ---------- + -------------- + --------- + - // 0 1024 2048 3072 4096 - l := fill([]entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 3: true}}, - LockRange: LockRange{2048, 3072}, - }, - { - Lock: Lock{HasWriter: true, Writer: 1}, - LockRange: LockRange{3072, 4096}, - }, - }) - - // Now that we have a mildly interesting layout, try some checks on different - // ranges, uids, and lock types. - // - // Expect to be able to extend the read lock, despite the writer lock, because - // the writer has the same uid as the requested read lock. - r := LockRange{0, 8192} - if !l.canLock(1, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 1) - } - // Expect to *not* be able to extend the read lock since there is an overlapping - // writer region locked by someone other than the uid. - if l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", ReadLock, r, 2) - } - // Expect to be able to extend the read lock if there are only other readers in - // the way. - r = LockRange{64, 3072} - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - // Expect to be able to set a read lock beyond the range of any existing locks. - r = LockRange{4096, 10240} - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - - // Expect to not be able to take a write lock with other readers in the way. - r = LockRange{0, 8192} - if l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", WriteLock, r, 1) - } - // Expect to be able to extend the write lock for the same uid. - r = LockRange{3072, 8192} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - // Expect to not be able to overlap a write lock for two different uids. - if l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", WriteLock, r, 2) - } - // Expect to be able to set a write lock that is beyond the range of any - // existing locks. - r = LockRange{8192, 10240} - if !l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 2) - } - // Expect to be able to upgrade a read lock (any portion of it). - r = LockRange{1024, 2048} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - r = LockRange{1080, 2000} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } -} - -func TestSetLock(t *testing.T) { - tests := []struct { - // description of test. - name string - - // LockSet entries to pre-fill. - before []entry - - // Description of region to lock: - // - // start is the file offset of the lock. - start uint64 - // end is the end file offset of the lock. - end uint64 - // uid of lock attempter. - uid UniqueID - // lock type requested. - lockType LockType - - // success is true if taking the above - // lock should succeed. - success bool - - // Expected layout of the set after locking - // if success is true. - after []entry - }{ - { - name: "set zero length ReadLock on empty set", - start: 0, - end: 0, - uid: 0, - lockType: ReadLock, - success: true, - }, - { - name: "set zero length WriteLock on empty set", - start: 0, - end: 0, - uid: 0, - lockType: WriteLock, - success: true, - }, - { - name: "set ReadLock on empty set", - start: 0, - end: LockEOF, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "set WriteLock on empty set", - start: 0, - end: LockEOF, - uid: 0, - lockType: WriteLock, - success: true, - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "set ReadLock on WriteLock same uid", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------- + --------------------------- + - // | Readers 0 | Writer 0 | - // + ----------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "set WriteLock on ReadLock same uid", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - lockType: WriteLock, - success: true, - // + ----------- + --------------------------- + - // | Writer 0 | Readers 0 | - // + ----------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "set ReadLock on WriteLock different uid", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: false, - }, - { - name: "set WriteLock on ReadLock different uid", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: WriteLock, - success: false, - }, - { - name: "split ReadLock for overlapping lock at start 0", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: true, - // + -------------- + --------------------------- + - // | Readers 0 & 1 | Readers 0 | - // + -------------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "split ReadLock for overlapping lock at non-zero start", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: 8192, - uid: 1, - lockType: ReadLock, - success: true, - // + ---------- + -------------- + ----------- + - // | Readers 0 | Readers 0 & 1 | Readers 0 | - // + ---------- + -------------- + ----------- + - // 0 4096 8192 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, 8192}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{8192, LockEOF}, - }, - }, - }, - { - name: "fill front gap with ReadLock", - // + --------- + ---------------------------- + - // | gap | Readers 0 | - // + --------- + ---------------------------- + - // 0 1024 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - start: 0, - end: 8192, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "fill end gap with ReadLock", - // + ---------------------------- + - // | Readers 0 | - // + ---------------------------- + - // 0 4096 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - }, - start: 1024, - end: LockEOF, - uid: 0, - lockType: ReadLock, - success: true, - // Note that this is not merged after lock does a Split. This is - // fine because the two locks will still *behave* as one. In other - // words we can fragment any lock all we want and semantically it - // makes no difference. - // - // + ----------- + --------------------------- + - // | Readers 0 | Readers 0 | - // + ----------- + --------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - }, - { - name: "fill gap with ReadLock and split", - // + --------- + ---------------------------- + - // | gap | Readers 0 | - // + --------- + ---------------------------- + - // 0 1024 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: true, - // + --------- + ------------- + ------------- + - // | Reader 1 | Readers 0 & 1 | Reader 0 | - // + ----------+ ------------- + ------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "upgrade ReadLock to WriteLock for single uid fill gap", - // + ------------- + --------- + --- + ------------- + - // | Readers 0 & 1 | Readers 0 | gap | Readers 0 & 2 | - // + ------------- + --------- + --- + ------------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - lockType: WriteLock, - success: true, - // + ------------- + -------- + ------------- + - // | Readers 0 & 1 | Writer 0 | Readers 0 & 2 | - // + ------------- + -------- + ------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "upgrade ReadLock to WriteLock for single uid keep gap", - // + ------------- + --------- + --- + ------------- + - // | Readers 0 & 1 | Readers 0 | gap | Readers 0 & 2 | - // + ------------- + --------- + --- + ------------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 1024, - end: 3072, - uid: 0, - lockType: WriteLock, - success: true, - // + ------------- + -------- + --- + ------------- + - // | Readers 0 & 1 | Writer 0 | gap | Readers 0 & 2 | - // + ------------- + -------- + --- + ------------- + - // 0 1024 3072 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{1024, 3072}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "fail to upgrade ReadLock to WriteLock with conflicting Reader", - // + ------------- + --------- + - // | Readers 0 & 1 | Readers 0 | - // + ------------- + --------- + - // 0 1024 2048 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - }, - start: 0, - end: 2048, - uid: 0, - lockType: WriteLock, - success: false, - }, - { - name: "take WriteLock on whole file if all uids are the same", - // + ------------- + --------- + --------- + ---------- + - // | Writer 0 | Readers 0 | Readers 0 | Readers 0 | - // + ------------- + --------- + --------- + ---------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{2048, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - lockType: WriteLock, - success: true, - // We do not manually merge locks. Semantically a fragmented lock - // held by the same uid will behave as one lock so it makes no difference. - // - // + ------------- + ---------------------------- + - // | Writer 0 | Writer 0 | - // + ------------- + ---------------------------- + - // 0 1024 max uint64 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - }, - } - - for _, test := range tests { - l := fill(test.before) - - r := LockRange{Start: test.start, End: test.end} - success := l.lock(test.uid, test.lockType, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - - if success != test.success { - t.Errorf("%s: setlock(%v, %+v, %d, %d) got success %v, want %v", test.name, test.before, r, test.uid, test.lockType, success, test.success) - continue - } - - if success { - if !equals(got, test.after) { - t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after) - } - } - } -} - -func TestUnlock(t *testing.T) { - tests := []struct { - // description of test. - name string - - // LockSet entries to pre-fill. - before []entry - - // Description of region to unlock: - // - // start is the file start of the lock. - start uint64 - // end is the end file start of the lock. - end uint64 - // uid of lock holder. - uid UniqueID - - // Expected layout of the set after unlocking. - after []entry - }{ - { - name: "unlock zero length on empty set", - start: 0, - end: 0, - uid: 0, - }, - { - name: "unlock on empty set (no-op)", - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock uid not locked (no-op)", - // + --------------------------- + - // | Readers 1 & 2 | - // + --------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - // + --------------------------- + - // | Readers 1 & 2 | - // + --------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "unlock ReadLock over entire file", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock WriteLock over entire file", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock partial ReadLock (start)", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - // + ------ + --------------------------- + - // | gap | Readers 0 | - // +------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock partial WriteLock (start)", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - // + ------ + --------------------------- + - // | gap | Writer 0 | - // +------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock partial ReadLock (end)", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: LockEOF, - uid: 0, - // + --------------------------- + - // | Readers 0 | - // +---------------------------- + - // 0 4096 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - }, - }, - { - name: "unlock partial WriteLock (end)", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: LockEOF, - uid: 0, - // + --------------------------- + - // | Writer 0 | - // +---------------------------- + - // 0 4096 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 4096}, - }, - }, - }, - { - name: "unlock for single uid", - // + ------------- + --------- + ------------------- + - // | Readers 0 & 1 | Writer 0 | Readers 0 & 1 & 2 | - // + ------------- + --------- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - // + --------- + --- + --------------- + - // | Readers 1 | gap | Readers 1 & 2 | - // + --------- + --- + --------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock subsection locked", - // + ------------------------------- + - // | Readers 0 & 1 & 2 | - // + ------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - // + ----------------- + ------------- + ----------------- + - // | Readers 0 & 1 & 2 | Readers 1 & 2 | Readers 0 & 1 & 2 | - // + ----------------- + ------------- + ----------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock mid-gap to increase gap", - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 8, - end: 2048, - uid: 0, - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 8 4096 max uint64 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 8}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock split region on uid mid-gap", - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 2048, - end: 8192, - uid: 0, - // + --------- + ----- + --------- + ------------- + - // | Writer 0 | gap | Readers 1 | Readers 0 & 1 | - // + --------- + ----- + --------- + ------------- + - // 0 1024 4096 8192 max uint64 - after: []entry{ - { - Lock: Lock{HasWriter: true, Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{4096, 8192}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{8192, LockEOF}, - }, - }, - }, - } - - for _, test := range tests { - l := fill(test.before) - - r := LockRange{Start: test.start, End: test.end} - l.unlock(test.uid, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - if !equals(got, test.after) { - t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after) - } - } -} diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go deleted file mode 100644 index 0b84732aa..000000000 --- a/pkg/sentry/fs/mount_test.go +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" -) - -// cacheReallyContains iterates through the dirent cache to determine whether -// it contains the given dirent. -func cacheReallyContains(cache *DirentCache, d *Dirent) bool { - for i := cache.list.Front(); i != nil; i = i.Next() { - if i == d { - return true - } - } - return false -} - -func mountPathsAre(root *Dirent, got []*Mount, want ...string) error { - gotPaths := make(map[string]struct{}, len(got)) - gotStr := make([]string, len(got)) - for i, g := range got { - groot := g.Root() - name, _ := groot.FullName(root) - groot.DecRef() - gotStr[i] = name - gotPaths[name] = struct{}{} - } - if len(got) != len(want) { - return fmt.Errorf("mount paths are different, got: %q, want: %q", gotStr, want) - } - for _, w := range want { - if _, ok := gotPaths[w]; !ok { - return fmt.Errorf("no mount with path %q found", w) - } - } - return nil -} - -// TestMountSourceOnlyCachedOnce tests that a Dirent that is mounted over only ends -// up in a single Dirent Cache. NOTE(b/63848693): Having a dirent in multiple -// caches causes major consistency issues. -func TestMountSourceOnlyCachedOnce(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef() - - // Get a child of the root which we will mount over. Note that the - // MockInodeOperations causes Walk to always succeed. - child, err := rootDirent.Walk(ctx, rootDirent, "child") - if err != nil { - t.Fatalf("failed to walk to child dirent: %v", err) - } - child.maybeExtendReference() // Cache. - - // Ensure that the root cache contains the child. - if !cacheReallyContains(rootCache, child) { - t.Errorf("wanted rootCache to contain child dirent, but it did not") - } - - // Create a new cache and inode, and mount it over child. - submountCache := NewDirentCache(100) - submountInode := NewMockInode(ctx, NewMockMountSource(submountCache), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, child, submountInode); err != nil { - t.Fatalf("failed to mount over child: %v", err) - } - - // Walk to the child again. - child2, err := rootDirent.Walk(ctx, rootDirent, "child") - if err != nil { - t.Fatalf("failed to walk to child dirent: %v", err) - } - - // Should have a different Dirent than before. - if child == child2 { - t.Fatalf("expected %v not equal to %v, but they are the same", child, child2) - } - - // Neither of the caches should no contain the child. - if cacheReallyContains(rootCache, child) { - t.Errorf("wanted rootCache not to contain child dirent, but it did") - } - if cacheReallyContains(submountCache, child) { - t.Errorf("wanted submountCache not to contain child dirent, but it did") - } -} - -func TestAllMountsUnder(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef() - - // Add mounts at the following paths: - paths := []string{ - "/foo", - "/foo/bar", - "/foo/bar/baz", - "/foo/qux", - "/waldo", - } - - var maxTraversals uint - for _, p := range paths { - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, d, submountInode); err != nil { - t.Fatalf("could not mount at %q: %v", p, err) - } - d.DecRef() - } - - // mm root should contain all submounts (and does not include the root mount). - rootMnt := mm.FindMount(rootDirent) - submounts := mm.AllMountsUnder(rootMnt) - allPaths := append(paths, "/") - if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil { - t.Error(err) - } - - // Each mount should have a unique ID. - foundIDs := make(map[uint64]struct{}) - for _, m := range submounts { - if _, ok := foundIDs[m.ID]; ok { - t.Errorf("got multiple mounts with id %d", m.ID) - } - foundIDs[m.ID] = struct{}{} - } - - // Root mount should have no parent. - if p := rootMnt.ParentID; p != invalidMountID { - t.Errorf("root.Parent got %v wanted nil", p) - } - - // Check that "foo" mount has 3 children. - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, "/foo", &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", "/foo", err) - } - defer d.DecRef() - submounts = mm.AllMountsUnder(mm.FindMount(d)) - if err := mountPathsAre(rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil { - t.Error(err) - } - - // "waldo" mount should have no children. - maxTraversals = 0 - waldo, err := mm.FindLink(ctx, rootDirent, nil, "/waldo", &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", "/waldo", err) - } - defer waldo.DecRef() - submounts = mm.AllMountsUnder(mm.FindMount(waldo)) - if err := mountPathsAre(rootDirent, submounts, "/waldo"); err != nil { - t.Error(err) - } -} - -func TestUnmount(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef() - - // Add mounts at the following paths: - paths := []string{ - "/foo", - "/foo/bar", - "/foo/bar/goo", - "/foo/bar/goo/abc", - "/foo/abc", - "/foo/def", - "/waldo", - "/wally", - } - - var maxTraversals uint - for _, p := range paths { - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, d, submountInode); err != nil { - t.Fatalf("could not mount at %q: %v", p, err) - } - d.DecRef() - } - - allPaths := make([]string, len(paths)+1) - allPaths[0] = "/" - copy(allPaths[1:], paths) - - rootMnt := mm.FindMount(rootDirent) - for i := len(paths) - 1; i >= 0; i-- { - maxTraversals = 0 - p := paths[i] - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - if err := mm.Unmount(ctx, d, false); err != nil { - t.Fatalf("could not unmount at %q: %v", p, err) - } - d.DecRef() - - // Remove the path that has been unmounted and the check that the remaining - // mounts are still there. - allPaths = allPaths[:len(allPaths)-1] - submounts := mm.AllMountsUnder(rootMnt) - if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil { - t.Error(err) - } - } -} diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go deleted file mode 100644 index c4c771f2c..000000000 --- a/pkg/sentry/fs/mounts_test.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -// Creates a new MountNamespace with filesystem: -// / (root dir) -// |-foo (dir) -// |-bar (file) -func createMountNamespace(ctx context.Context) (*fs.MountNamespace, error) { - perms := fs.FilePermsFromMode(0777) - m := fs.NewPseudoMountSource(ctx) - - barFile := fsutil.NewSimpleFileInode(ctx, fs.RootOwner, perms, 0) - fooDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "bar": fs.NewInode(ctx, barFile, m, fs.StableAttr{Type: fs.RegularFile}), - }, fs.RootOwner, perms) - rootDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "foo": fs.NewInode(ctx, fooDir, m, fs.StableAttr{Type: fs.Directory}), - }, fs.RootOwner, perms) - - return fs.NewMountNamespace(ctx, fs.NewInode(ctx, rootDir, m, fs.StableAttr{Type: fs.Directory})) -} - -func TestFindLink(t *testing.T) { - ctx := contexttest.Context(t) - mm, err := createMountNamespace(ctx) - if err != nil { - t.Fatalf("createMountNamespace failed: %v", err) - } - - root := mm.Root() - defer root.DecRef() - foo, err := root.Walk(ctx, root, "foo") - if err != nil { - t.Fatalf("Error walking to foo: %v", err) - } - - // Positive cases. - for _, tc := range []struct { - findPath string - wd *fs.Dirent - wantPath string - }{ - {".", root, "/"}, - {".", foo, "/foo"}, - {"..", foo, "/"}, - {"../../..", foo, "/"}, - {"///foo", foo, "/foo"}, - {"/foo", foo, "/foo"}, - {"/foo/bar", foo, "/foo/bar"}, - {"/foo/.///./bar", foo, "/foo/bar"}, - {"/foo///bar", foo, "/foo/bar"}, - {"/foo/../foo/bar", foo, "/foo/bar"}, - {"foo/bar", root, "/foo/bar"}, - {"foo////bar", root, "/foo/bar"}, - {"bar", foo, "/foo/bar"}, - } { - wdPath, _ := tc.wd.FullName(root) - maxTraversals := uint(0) - if d, err := mm.FindLink(ctx, root, tc.wd, tc.findPath, &maxTraversals); err != nil { - t.Errorf("FindLink(%q, wd=%q) failed: %v", tc.findPath, wdPath, err) - } else if got, _ := d.FullName(root); got != tc.wantPath { - t.Errorf("FindLink(%q, wd=%q) got dirent %q, want %q", tc.findPath, wdPath, got, tc.wantPath) - } - } - - // Negative cases. - for _, tc := range []struct { - findPath string - wd *fs.Dirent - }{ - {"bar", root}, - {"/bar", root}, - {"/foo/../../bar", root}, - {"foo", foo}, - } { - wdPath, _ := tc.wd.FullName(root) - maxTraversals := uint(0) - if _, err := mm.FindLink(ctx, root, tc.wd, tc.findPath, &maxTraversals); err == nil { - t.Errorf("FindLink(%q, wd=%q) did not return error", tc.findPath, wdPath) - } - } -} diff --git a/pkg/sentry/fs/path_test.go b/pkg/sentry/fs/path_test.go deleted file mode 100644 index e6f57ebba..000000000 --- a/pkg/sentry/fs/path_test.go +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" -) - -// TestSplitLast tests variants of path splitting. -func TestSplitLast(t *testing.T) { - cases := []struct { - path string - dir string - file string - }{ - {path: "/", dir: "/", file: "."}, - {path: "/.", dir: "/", file: "."}, - {path: "/./", dir: "/", file: "."}, - {path: "/./.", dir: "/.", file: "."}, - {path: "/././", dir: "/.", file: "."}, - {path: "/./..", dir: "/.", file: ".."}, - {path: "/./../", dir: "/.", file: ".."}, - {path: "/..", dir: "/", file: ".."}, - {path: "/../", dir: "/", file: ".."}, - {path: "/../.", dir: "/..", file: "."}, - {path: "/.././", dir: "/..", file: "."}, - {path: "/../..", dir: "/..", file: ".."}, - {path: "/../../", dir: "/..", file: ".."}, - - {path: "", dir: ".", file: "."}, - {path: ".", dir: ".", file: "."}, - {path: "./", dir: ".", file: "."}, - {path: "./.", dir: ".", file: "."}, - {path: "././", dir: ".", file: "."}, - {path: "./..", dir: ".", file: ".."}, - {path: "./../", dir: ".", file: ".."}, - {path: "..", dir: ".", file: ".."}, - {path: "../", dir: ".", file: ".."}, - {path: "../.", dir: "..", file: "."}, - {path: ".././", dir: "..", file: "."}, - {path: "../..", dir: "..", file: ".."}, - {path: "../../", dir: "..", file: ".."}, - - {path: "/foo", dir: "/", file: "foo"}, - {path: "/foo/", dir: "/", file: "foo"}, - {path: "/foo/.", dir: "/foo", file: "."}, - {path: "/foo/./", dir: "/foo", file: "."}, - {path: "/foo/./.", dir: "/foo/.", file: "."}, - {path: "/foo/./..", dir: "/foo/.", file: ".."}, - {path: "/foo/..", dir: "/foo", file: ".."}, - {path: "/foo/../", dir: "/foo", file: ".."}, - {path: "/foo/../.", dir: "/foo/..", file: "."}, - {path: "/foo/../..", dir: "/foo/..", file: ".."}, - - {path: "/foo/bar", dir: "/foo", file: "bar"}, - {path: "/foo/bar/", dir: "/foo", file: "bar"}, - {path: "/foo/bar/.", dir: "/foo/bar", file: "."}, - {path: "/foo/bar/./", dir: "/foo/bar", file: "."}, - {path: "/foo/bar/./.", dir: "/foo/bar/.", file: "."}, - {path: "/foo/bar/./..", dir: "/foo/bar/.", file: ".."}, - {path: "/foo/bar/..", dir: "/foo/bar", file: ".."}, - {path: "/foo/bar/../", dir: "/foo/bar", file: ".."}, - {path: "/foo/bar/../.", dir: "/foo/bar/..", file: "."}, - {path: "/foo/bar/../..", dir: "/foo/bar/..", file: ".."}, - - {path: "foo", dir: ".", file: "foo"}, - {path: "foo", dir: ".", file: "foo"}, - {path: "foo/", dir: ".", file: "foo"}, - {path: "foo/.", dir: "foo", file: "."}, - {path: "foo/./", dir: "foo", file: "."}, - {path: "foo/./.", dir: "foo/.", file: "."}, - {path: "foo/./..", dir: "foo/.", file: ".."}, - {path: "foo/..", dir: "foo", file: ".."}, - {path: "foo/../", dir: "foo", file: ".."}, - {path: "foo/../.", dir: "foo/..", file: "."}, - {path: "foo/../..", dir: "foo/..", file: ".."}, - {path: "foo/", dir: ".", file: "foo"}, - {path: "foo/.", dir: "foo", file: "."}, - - {path: "foo/bar", dir: "foo", file: "bar"}, - {path: "foo/bar/", dir: "foo", file: "bar"}, - {path: "foo/bar/.", dir: "foo/bar", file: "."}, - {path: "foo/bar/./", dir: "foo/bar", file: "."}, - {path: "foo/bar/./.", dir: "foo/bar/.", file: "."}, - {path: "foo/bar/./..", dir: "foo/bar/.", file: ".."}, - {path: "foo/bar/..", dir: "foo/bar", file: ".."}, - {path: "foo/bar/../", dir: "foo/bar", file: ".."}, - {path: "foo/bar/../.", dir: "foo/bar/..", file: "."}, - {path: "foo/bar/../..", dir: "foo/bar/..", file: ".."}, - {path: "foo/bar/", dir: "foo", file: "bar"}, - {path: "foo/bar/.", dir: "foo/bar", file: "."}, - } - - for _, c := range cases { - dir, file := SplitLast(c.path) - if dir != c.dir || file != c.file { - t.Errorf("SplitLast(%q) got (%q, %q), expected (%q, %q)", c.path, dir, file, c.dir, c.file) - } - } -} - -// TestSplitFirst tests variants of path splitting. -func TestSplitFirst(t *testing.T) { - cases := []struct { - path string - first string - remainder string - }{ - {path: "/", first: "/", remainder: ""}, - {path: "/.", first: "/", remainder: "."}, - {path: "///.", first: "/", remainder: "//."}, - {path: "/.///", first: "/", remainder: "."}, - {path: "/./.", first: "/", remainder: "./."}, - {path: "/././", first: "/", remainder: "./."}, - {path: "/./..", first: "/", remainder: "./.."}, - {path: "/./../", first: "/", remainder: "./.."}, - {path: "/..", first: "/", remainder: ".."}, - {path: "/../", first: "/", remainder: ".."}, - {path: "/../.", first: "/", remainder: "../."}, - {path: "/.././", first: "/", remainder: "../."}, - {path: "/../..", first: "/", remainder: "../.."}, - {path: "/../../", first: "/", remainder: "../.."}, - - {path: "", first: ".", remainder: ""}, - {path: ".", first: ".", remainder: ""}, - {path: "./", first: ".", remainder: ""}, - {path: ".///", first: ".", remainder: ""}, - {path: "./.", first: ".", remainder: "."}, - {path: "././", first: ".", remainder: "."}, - {path: "./..", first: ".", remainder: ".."}, - {path: "./../", first: ".", remainder: ".."}, - {path: "..", first: "..", remainder: ""}, - {path: "../", first: "..", remainder: ""}, - {path: "../.", first: "..", remainder: "."}, - {path: ".././", first: "..", remainder: "."}, - {path: "../..", first: "..", remainder: ".."}, - {path: "../../", first: "..", remainder: ".."}, - - {path: "/foo", first: "/", remainder: "foo"}, - {path: "/foo/", first: "/", remainder: "foo"}, - {path: "/foo///", first: "/", remainder: "foo"}, - {path: "/foo/.", first: "/", remainder: "foo/."}, - {path: "/foo/./", first: "/", remainder: "foo/."}, - {path: "/foo/./.", first: "/", remainder: "foo/./."}, - {path: "/foo/./..", first: "/", remainder: "foo/./.."}, - {path: "/foo/..", first: "/", remainder: "foo/.."}, - {path: "/foo/../", first: "/", remainder: "foo/.."}, - {path: "/foo/../.", first: "/", remainder: "foo/../."}, - {path: "/foo/../..", first: "/", remainder: "foo/../.."}, - - {path: "/foo/bar", first: "/", remainder: "foo/bar"}, - {path: "///foo/bar", first: "/", remainder: "//foo/bar"}, - {path: "/foo///bar", first: "/", remainder: "foo///bar"}, - {path: "/foo/bar/.", first: "/", remainder: "foo/bar/."}, - {path: "/foo/bar/./", first: "/", remainder: "foo/bar/."}, - {path: "/foo/bar/./.", first: "/", remainder: "foo/bar/./."}, - {path: "/foo/bar/./..", first: "/", remainder: "foo/bar/./.."}, - {path: "/foo/bar/..", first: "/", remainder: "foo/bar/.."}, - {path: "/foo/bar/../", first: "/", remainder: "foo/bar/.."}, - {path: "/foo/bar/../.", first: "/", remainder: "foo/bar/../."}, - {path: "/foo/bar/../..", first: "/", remainder: "foo/bar/../.."}, - - {path: "foo", first: "foo", remainder: ""}, - {path: "foo", first: "foo", remainder: ""}, - {path: "foo/", first: "foo", remainder: ""}, - {path: "foo///", first: "foo", remainder: ""}, - {path: "foo/.", first: "foo", remainder: "."}, - {path: "foo/./", first: "foo", remainder: "."}, - {path: "foo/./.", first: "foo", remainder: "./."}, - {path: "foo/./..", first: "foo", remainder: "./.."}, - {path: "foo/..", first: "foo", remainder: ".."}, - {path: "foo/../", first: "foo", remainder: ".."}, - {path: "foo/../.", first: "foo", remainder: "../."}, - {path: "foo/../..", first: "foo", remainder: "../.."}, - {path: "foo/", first: "foo", remainder: ""}, - {path: "foo/.", first: "foo", remainder: "."}, - - {path: "foo/bar", first: "foo", remainder: "bar"}, - {path: "foo///bar", first: "foo", remainder: "bar"}, - {path: "foo/bar/", first: "foo", remainder: "bar"}, - {path: "foo/bar/.", first: "foo", remainder: "bar/."}, - {path: "foo/bar/./", first: "foo", remainder: "bar/."}, - {path: "foo/bar/./.", first: "foo", remainder: "bar/./."}, - {path: "foo/bar/./..", first: "foo", remainder: "bar/./.."}, - {path: "foo/bar/..", first: "foo", remainder: "bar/.."}, - {path: "foo/bar/../", first: "foo", remainder: "bar/.."}, - {path: "foo/bar/../.", first: "foo", remainder: "bar/../."}, - {path: "foo/bar/../..", first: "foo", remainder: "bar/../.."}, - {path: "foo/bar/", first: "foo", remainder: "bar"}, - {path: "foo/bar/.", first: "foo", remainder: "bar/."}, - } - - for _, c := range cases { - first, remainder := SplitFirst(c.path) - if first != c.first || remainder != c.remainder { - t.Errorf("SplitFirst(%q) got (%q, %q), expected (%q, %q)", c.path, first, remainder, c.first, c.remainder) - } - } -} - -// TestIsSubpath tests the IsSubpath method. -func TestIsSubpath(t *testing.T) { - tcs := []struct { - // Two absolute paths. - pathA string - pathB string - - // Whether pathA is a subpath of pathB. - wantIsSubpath bool - - // Relative path from pathA to pathB. Only checked if - // wantIsSubpath is true. - wantRelpath string - }{ - { - pathA: "/foo/bar/baz", - pathB: "/foo", - wantIsSubpath: true, - wantRelpath: "bar/baz", - }, - { - pathA: "/foo", - pathB: "/foo/bar/baz", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foobar", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foobar", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foobar", - wantIsSubpath: false, - }, - { - pathA: "/", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/", - wantIsSubpath: true, - wantRelpath: "foo", - }, - { - pathA: "/foo/bar/../bar", - pathB: "/foo", - wantIsSubpath: true, - wantRelpath: "bar", - }, - { - pathA: "/foo/bar", - pathB: "/foo/../foo", - wantIsSubpath: true, - wantRelpath: "bar", - }, - } - - for _, tc := range tcs { - gotRelpath, gotIsSubpath := IsSubpath(tc.pathA, tc.pathB) - if gotRelpath != tc.wantRelpath || gotIsSubpath != tc.wantIsSubpath { - t.Errorf("IsSubpath(%q, %q) got %q %t, want %q %t", tc.pathA, tc.pathB, gotRelpath, gotIsSubpath, tc.wantRelpath, tc.wantIsSubpath) - } - } -} diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD deleted file mode 100644 index 70ed854a8..000000000 --- a/pkg/sentry/fs/proc/BUILD +++ /dev/null @@ -1,73 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "proc", - srcs = [ - "cgroup.go", - "cpuinfo.go", - "exec_args.go", - "fds.go", - "filesystems.go", - "fs.go", - "inode.go", - "loadavg.go", - "meminfo.go", - "mounts.go", - "net.go", - "proc.go", - "rpcinet_proc.go", - "stat.go", - "sys.go", - "sys_net.go", - "sys_net_state.go", - "task.go", - "uid_gid_map.go", - "uptime.go", - "version.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/proc/device", - "//pkg/sentry/fs/proc/seqfile", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/rpcinet", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "proc_test", - size = "small", - srcs = [ - "net_test.go", - "sys_net_test.go", - ], - embed = [":proc"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/inet", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md deleted file mode 100644 index 5d4ec6c7b..000000000 --- a/pkg/sentry/fs/proc/README.md +++ /dev/null @@ -1,332 +0,0 @@ -This document tracks what is implemented in procfs. Refer to -Documentation/filesystems/proc.txt in the Linux project for information about -procfs generally. - -**NOTE**: This document is not guaranteed to be up to date. If you find an -inconsistency, please file a bug. - -[TOC] - -## Kernel data - -The following files are implemented: - -| File /proc/ | Content | -| :------------------------ | :---------------------------------------------------- | -| [cpuinfo](#cpuinfo) | Info about the CPU | -| [filesystems](#filesystems) | Supported filesystems | -| [loadavg](#loadavg) | Load average of last 1, 5 & 15 minutes | -| [meminfo](#meminfo) | Overall memory info | -| [stat](#stat) | Overall kernel statistics | -| [sys](#sys) | Change parameters within the kernel | -| [uptime](#uptime) | Wall clock since boot, combined idle time of all cpus | -| [version](#version) | Kernel version | - -### cpuinfo - -```bash -$ cat /proc/cpuinfo -processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 45 -model name : unknown -stepping : unknown -cpu MHz : 1234.588 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx xsaveopt -bogomips : 1234.59 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management: - -... -``` - -Notable divergences: - -Field name | Notes -:--------------- | :--------------------------------------- -model name | Always unknown -stepping | Always unknown -fpu | Always yes -fpu_exception | Always yes -wp | Always yes -bogomips | Bogus value (matches cpu MHz) -clflush size | Always 64 -cache_alignment | Always 64 -address sizes | Always 46 bits physical, 48 bits virtual -power management | Always blank - -Otherwise fields are derived from the sentry configuration. - -### filesystems - -```bash -$ cat /proc/filesystems -nodev 9p -nodev devpts -nodev devtmpfs -nodev proc -nodev sysfs -nodev tmpfs -``` - -### loadavg - -```bash -$ cat /proc/loadavg -0.00 0.00 0.00 0/0 0 -``` - -Column | Notes -:------------------------------------ | :---------- -CPU.IO utilization in last 1 minute | Always zero -CPU.IO utilization in last 5 minutes | Always zero -CPU.IO utilization in last 10 minutes | Always zero -Num currently running processes | Always zero -Total num processes | Always zero - -TODO(b/62345059): Populate the columns with accurate statistics. - -### meminfo - -```bash -$ cat /proc/meminfo -MemTotal: 2097152 kB -MemFree: 2083540 kB -MemAvailable: 2083540 kB -Buffers: 0 kB -Cached: 4428 kB -SwapCache: 0 kB -Active: 10812 kB -Inactive: 2216 kB -Active(anon): 8600 kB -Inactive(anon): 0 kB -Active(file): 2212 kB -Inactive(file): 2216 kB -Unevictable: 0 kB -Mlocked: 0 kB -SwapTotal: 0 kB -SwapFree: 0 kB -Dirty: 0 kB -Writeback: 0 kB -AnonPages: 8600 kB -Mapped: 4428 kB -Shmem: 0 kB - -``` - -Notable divergences: - -Field name | Notes -:---------------- | :----------------------------------------------------- -Buffers | Always zero, no block devices -SwapCache | Always zero, no swap -Inactive(anon) | Always zero, see SwapCache -Unevictable | Always zero TODO(b/31823263) -Mlocked | Always zero TODO(b/31823263) -SwapTotal | Always zero, no swap -SwapFree | Always zero, no swap -Dirty | Always zero TODO(b/31823263) -Writeback | Always zero TODO(b/31823263) -MemAvailable | Uses the same value as MemFree since there is no swap. -Slab | Missing -SReclaimable | Missing -SUnreclaim | Missing -KernelStack | Missing -PageTables | Missing -NFS_Unstable | Missing -Bounce | Missing -WritebackTmp | Missing -CommitLimit | Missing -Committed_AS | Missing -VmallocTotal | Missing -VmallocUsed | Missing -VmallocChunk | Missing -HardwareCorrupted | Missing -AnonHugePages | Missing -ShmemHugePages | Missing -ShmemPmdMapped | Missing -HugePages_Total | Missing -HugePages_Free | Missing -HugePages_Rsvd | Missing -HugePages_Surp | Missing -Hugepagesize | Missing -DirectMap4k | Missing -DirectMap2M | Missing -DirectMap1G | Missing - -### stat - -```bash -$ cat /proc/stat -cpu 0 0 0 0 0 0 0 0 0 0 -cpu0 0 0 0 0 0 0 0 0 0 0 -cpu1 0 0 0 0 0 0 0 0 0 0 -cpu2 0 0 0 0 0 0 0 0 0 0 -cpu3 0 0 0 0 0 0 0 0 0 0 -cpu4 0 0 0 0 0 0 0 0 0 0 -cpu5 0 0 0 0 0 0 0 0 0 0 -cpu6 0 0 0 0 0 0 0 0 0 0 -cpu7 0 0 0 0 0 0 0 0 0 0 -intr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -ctxt 0 -btime 1504040968 -processes 0 -procs_running 0 -procs_blokkcked 0 -softirq 0 0 0 0 0 0 0 0 0 0 0 -``` - -All fields except for `btime` are always zero. - -TODO(b/37226836): Populate with accurate fields. - -### sys - -```bash -$ ls /proc/sys -kernel vm -``` - -Directory | Notes -:-------- | :---------------------------- -abi | Missing -debug | Missing -dev | Missing -fs | Missing -kernel | Contains hostname (only) -net | Missing -user | Missing -vm | Contains mmap_min_addr (only) - -### uptime - -```bash -$ cat /proc/uptime -3204.62 0.00 -``` - -Column | Notes -:------------------------------- | :---------------------------- -Total num seconds system running | Time since procfs was mounted -Number of seconds idle | Always zero - -### version - -```bash -$ cat /proc/version -Linux version 4.4 #1 SMP Sun Jan 10 15:06:54 PST 2016 -``` - -## Process-specific data - -The following files are implemented: - -File /proc/PID | Content -:---------------------- | :--------------------------------------------------- -[auxv](#auxv) | Copy of auxiliary vector for the process -[cmdline](#cmdline) | Command line arguments -[comm](#comm) | Command name associated with the process -[environ](#environ) | Process environment -[exe](#exe) | Symlink to the process's executable -[fd](#fd) | Directory containing links to open file descriptors -[fdinfo](#fdinfo) | Information associated with open file descriptors -[gid_map](#gid_map) | Mappings for group IDs inside the user namespace -[io](#io) | IO statistics -[maps](#maps) | Memory mappings (anon, executables, library files) -[mounts](#mounts) | Mounted filesystems -[mountinfo](#mountinfo) | Information about mounts -[ns](#ns) | Directory containing info about supported namespaces -[stat](#stat) | Process statistics -[statm](#statm) | Process memory statistics -[status](#status) | Process status in human readable format -[task](#task) | Directory containing info about running threads -[uid_map](#uid_map) | Mappings for user IDs inside the user namespace - -### auxv - -TODO - -### cmdline - -TODO - -### comm - -TODO - -### environment - -TODO - -### exe - -TODO - -### fd - -TODO - -### fdinfo - -TODO - -### gid_map - -TODO - -### io - -Only has data for rchar, wchar, syscr, and syscw. - -TODO: add more detail. - -### maps - -TODO - -### mounts - -TODO - -### mountinfo - -TODO - -### ns - -TODO - -### stat - -Only has data for pid, comm, state, ppid, utime, stime, cutime, cstime, -num_threads, and exit_signal. - -TODO: add more detail. - -### statm - -Only has data for vss and rss. - -TODO: add more detail. - -### status - -Contains data for Name, State, Tgid, Pid, Ppid, TracerPid, FDSize, VmSize, -VmRSS, Threads, CapInh, CapPrm, CapEff, CapBnd, Seccomp. - -TODO: add more detail. - -### task - -TODO - -### uid_map - -TODO diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD deleted file mode 100644 index 0394451d4..000000000 --- a/pkg/sentry/fs/proc/device/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "device", - srcs = ["device.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/device", - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/sentry/device"], -) diff --git a/pkg/sentry/fs/proc/device/device_state_autogen.go b/pkg/sentry/fs/proc/device/device_state_autogen.go new file mode 100755 index 000000000..be407ac45 --- /dev/null +++ b/pkg/sentry/fs/proc/device/device_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package device + diff --git a/pkg/sentry/fs/proc/net_test.go b/pkg/sentry/fs/proc/net_test.go deleted file mode 100644 index f18681405..000000000 --- a/pkg/sentry/fs/proc/net_test.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/inet" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{s: newIPv6TestStack()} - if got := n.contents(); got != nil { - t.Errorf("Got n.contents() = %v, want = %v", got, nil) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{s: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} diff --git a/pkg/sentry/fs/proc/proc_state_autogen.go b/pkg/sentry/fs/proc/proc_state_autogen.go new file mode 100755 index 000000000..2fe57901a --- /dev/null +++ b/pkg/sentry/fs/proc/proc_state_autogen.go @@ -0,0 +1,669 @@ +// automatically generated by stateify. + +package proc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *execArgInode) beforeSave() {} +func (x *execArgInode) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("arg", &x.arg) + m.Save("t", &x.t) +} + +func (x *execArgInode) afterLoad() {} +func (x *execArgInode) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("arg", &x.arg) + m.Load("t", &x.t) +} + +func (x *execArgFile) beforeSave() {} +func (x *execArgFile) save(m state.Map) { + x.beforeSave() + m.Save("arg", &x.arg) + m.Save("t", &x.t) +} + +func (x *execArgFile) afterLoad() {} +func (x *execArgFile) load(m state.Map) { + m.Load("arg", &x.arg) + m.Load("t", &x.t) +} + +func (x *fdDir) beforeSave() {} +func (x *fdDir) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) +} + +func (x *fdDir) afterLoad() {} +func (x *fdDir) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) +} + +func (x *fdDirFile) beforeSave() {} +func (x *fdDirFile) save(m state.Map) { + x.beforeSave() + m.Save("isInfoFile", &x.isInfoFile) + m.Save("t", &x.t) +} + +func (x *fdDirFile) afterLoad() {} +func (x *fdDirFile) load(m state.Map) { + m.Load("isInfoFile", &x.isInfoFile) + m.Load("t", &x.t) +} + +func (x *fdInfoDir) beforeSave() {} +func (x *fdInfoDir) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) +} + +func (x *fdInfoDir) afterLoad() {} +func (x *fdInfoDir) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) +} + +func (x *filesystemsData) beforeSave() {} +func (x *filesystemsData) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystemsData) afterLoad() {} +func (x *filesystemsData) load(m state.Map) { +} + +func (x *filesystem) beforeSave() {} +func (x *filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystem) afterLoad() {} +func (x *filesystem) load(m state.Map) { +} + +func (x *taskOwnedInodeOps) beforeSave() {} +func (x *taskOwnedInodeOps) save(m state.Map) { + x.beforeSave() + m.Save("InodeOperations", &x.InodeOperations) + m.Save("t", &x.t) +} + +func (x *taskOwnedInodeOps) afterLoad() {} +func (x *taskOwnedInodeOps) load(m state.Map) { + m.Load("InodeOperations", &x.InodeOperations) + m.Load("t", &x.t) +} + +func (x *staticFileInodeOps) beforeSave() {} +func (x *staticFileInodeOps) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeStaticFileGetter", &x.InodeStaticFileGetter) +} + +func (x *staticFileInodeOps) afterLoad() {} +func (x *staticFileInodeOps) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeStaticFileGetter", &x.InodeStaticFileGetter) +} + +func (x *loadavgData) beforeSave() {} +func (x *loadavgData) save(m state.Map) { + x.beforeSave() +} + +func (x *loadavgData) afterLoad() {} +func (x *loadavgData) load(m state.Map) { +} + +func (x *meminfoData) beforeSave() {} +func (x *meminfoData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *meminfoData) afterLoad() {} +func (x *meminfoData) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *mountInfoFile) beforeSave() {} +func (x *mountInfoFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *mountInfoFile) afterLoad() {} +func (x *mountInfoFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *mountsFile) beforeSave() {} +func (x *mountsFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *mountsFile) afterLoad() {} +func (x *mountsFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *ifinet6) beforeSave() {} +func (x *ifinet6) save(m state.Map) { + x.beforeSave() + m.Save("s", &x.s) +} + +func (x *ifinet6) afterLoad() {} +func (x *ifinet6) load(m state.Map) { + m.Load("s", &x.s) +} + +func (x *netDev) beforeSave() {} +func (x *netDev) save(m state.Map) { + x.beforeSave() + m.Save("s", &x.s) +} + +func (x *netDev) afterLoad() {} +func (x *netDev) load(m state.Map) { + m.Load("s", &x.s) +} + +func (x *netUnix) beforeSave() {} +func (x *netUnix) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *netUnix) afterLoad() {} +func (x *netUnix) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *netTCP) beforeSave() {} +func (x *netTCP) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *netTCP) afterLoad() {} +func (x *netTCP) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *proc) beforeSave() {} +func (x *proc) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("k", &x.k) + m.Save("pidns", &x.pidns) + m.Save("cgroupControllers", &x.cgroupControllers) +} + +func (x *proc) afterLoad() {} +func (x *proc) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("k", &x.k) + m.Load("pidns", &x.pidns) + m.Load("cgroupControllers", &x.cgroupControllers) +} + +func (x *self) beforeSave() {} +func (x *self) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("pidns", &x.pidns) +} + +func (x *self) afterLoad() {} +func (x *self) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("pidns", &x.pidns) +} + +func (x *threadSelf) beforeSave() {} +func (x *threadSelf) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("pidns", &x.pidns) +} + +func (x *threadSelf) afterLoad() {} +func (x *threadSelf) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("pidns", &x.pidns) +} + +func (x *rootProcFile) beforeSave() {} +func (x *rootProcFile) save(m state.Map) { + x.beforeSave() + m.Save("iops", &x.iops) +} + +func (x *rootProcFile) afterLoad() {} +func (x *rootProcFile) load(m state.Map) { + m.Load("iops", &x.iops) +} + +func (x *statData) beforeSave() {} +func (x *statData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *statData) afterLoad() {} +func (x *statData) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *mmapMinAddrData) beforeSave() {} +func (x *mmapMinAddrData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *mmapMinAddrData) afterLoad() {} +func (x *mmapMinAddrData) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *overcommitMemory) beforeSave() {} +func (x *overcommitMemory) save(m state.Map) { + x.beforeSave() +} + +func (x *overcommitMemory) afterLoad() {} +func (x *overcommitMemory) load(m state.Map) { +} + +func (x *hostname) beforeSave() {} +func (x *hostname) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) +} + +func (x *hostname) afterLoad() {} +func (x *hostname) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) +} + +func (x *hostnameFile) beforeSave() {} +func (x *hostnameFile) save(m state.Map) { + x.beforeSave() +} + +func (x *hostnameFile) afterLoad() {} +func (x *hostnameFile) load(m state.Map) { +} + +func (x *tcpMemInode) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("dir", &x.dir) + m.Save("s", &x.s) + m.Save("size", &x.size) +} + +func (x *tcpMemInode) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("dir", &x.dir) + m.LoadWait("s", &x.s) + m.Load("size", &x.size) + m.AfterLoad(x.afterLoad) +} + +func (x *tcpMemFile) beforeSave() {} +func (x *tcpMemFile) save(m state.Map) { + x.beforeSave() + m.Save("tcpMemInode", &x.tcpMemInode) +} + +func (x *tcpMemFile) afterLoad() {} +func (x *tcpMemFile) load(m state.Map) { + m.Load("tcpMemInode", &x.tcpMemInode) +} + +func (x *tcpSack) beforeSave() {} +func (x *tcpSack) save(m state.Map) { + x.beforeSave() + m.Save("stack", &x.stack) + m.Save("enabled", &x.enabled) + m.Save("SimpleFileInode", &x.SimpleFileInode) +} + +func (x *tcpSack) load(m state.Map) { + m.LoadWait("stack", &x.stack) + m.Load("enabled", &x.enabled) + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.AfterLoad(x.afterLoad) +} + +func (x *tcpSackFile) beforeSave() {} +func (x *tcpSackFile) save(m state.Map) { + x.beforeSave() + m.Save("tcpSack", &x.tcpSack) + m.Save("stack", &x.stack) +} + +func (x *tcpSackFile) afterLoad() {} +func (x *tcpSackFile) load(m state.Map) { + m.Load("tcpSack", &x.tcpSack) + m.LoadWait("stack", &x.stack) +} + +func (x *taskDir) beforeSave() {} +func (x *taskDir) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) + m.Save("pidns", &x.pidns) +} + +func (x *taskDir) afterLoad() {} +func (x *taskDir) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) + m.Load("pidns", &x.pidns) +} + +func (x *subtasks) beforeSave() {} +func (x *subtasks) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) + m.Save("p", &x.p) +} + +func (x *subtasks) afterLoad() {} +func (x *subtasks) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) + m.Load("p", &x.p) +} + +func (x *subtasksFile) beforeSave() {} +func (x *subtasksFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("pidns", &x.pidns) +} + +func (x *subtasksFile) afterLoad() {} +func (x *subtasksFile) load(m state.Map) { + m.Load("t", &x.t) + m.Load("pidns", &x.pidns) +} + +func (x *exe) beforeSave() {} +func (x *exe) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("t", &x.t) +} + +func (x *exe) afterLoad() {} +func (x *exe) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("t", &x.t) +} + +func (x *namespaceSymlink) beforeSave() {} +func (x *namespaceSymlink) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("t", &x.t) +} + +func (x *namespaceSymlink) afterLoad() {} +func (x *namespaceSymlink) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("t", &x.t) +} + +func (x *mapsData) beforeSave() {} +func (x *mapsData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *mapsData) afterLoad() {} +func (x *mapsData) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *smapsData) beforeSave() {} +func (x *smapsData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *smapsData) afterLoad() {} +func (x *smapsData) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *taskStatData) beforeSave() {} +func (x *taskStatData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("tgstats", &x.tgstats) + m.Save("pidns", &x.pidns) +} + +func (x *taskStatData) afterLoad() {} +func (x *taskStatData) load(m state.Map) { + m.Load("t", &x.t) + m.Load("tgstats", &x.tgstats) + m.Load("pidns", &x.pidns) +} + +func (x *statmData) beforeSave() {} +func (x *statmData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *statmData) afterLoad() {} +func (x *statmData) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *statusData) beforeSave() {} +func (x *statusData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("pidns", &x.pidns) +} + +func (x *statusData) afterLoad() {} +func (x *statusData) load(m state.Map) { + m.Load("t", &x.t) + m.Load("pidns", &x.pidns) +} + +func (x *ioData) beforeSave() {} +func (x *ioData) save(m state.Map) { + x.beforeSave() + m.Save("ioUsage", &x.ioUsage) +} + +func (x *ioData) afterLoad() {} +func (x *ioData) load(m state.Map) { + m.Load("ioUsage", &x.ioUsage) +} + +func (x *comm) beforeSave() {} +func (x *comm) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("t", &x.t) +} + +func (x *comm) afterLoad() {} +func (x *comm) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("t", &x.t) +} + +func (x *commFile) beforeSave() {} +func (x *commFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *commFile) afterLoad() {} +func (x *commFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *auxvec) beforeSave() {} +func (x *auxvec) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("t", &x.t) +} + +func (x *auxvec) afterLoad() {} +func (x *auxvec) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("t", &x.t) +} + +func (x *auxvecFile) beforeSave() {} +func (x *auxvecFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *auxvecFile) afterLoad() {} +func (x *auxvecFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *idMapInodeOperations) beforeSave() {} +func (x *idMapInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("t", &x.t) + m.Save("gids", &x.gids) +} + +func (x *idMapInodeOperations) afterLoad() {} +func (x *idMapInodeOperations) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("t", &x.t) + m.Load("gids", &x.gids) +} + +func (x *idMapFileOperations) beforeSave() {} +func (x *idMapFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("iops", &x.iops) +} + +func (x *idMapFileOperations) afterLoad() {} +func (x *idMapFileOperations) load(m state.Map) { + m.Load("iops", &x.iops) +} + +func (x *uptime) beforeSave() {} +func (x *uptime) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("startTime", &x.startTime) +} + +func (x *uptime) afterLoad() {} +func (x *uptime) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("startTime", &x.startTime) +} + +func (x *uptimeFile) beforeSave() {} +func (x *uptimeFile) save(m state.Map) { + x.beforeSave() + m.Save("startTime", &x.startTime) +} + +func (x *uptimeFile) afterLoad() {} +func (x *uptimeFile) load(m state.Map) { + m.Load("startTime", &x.startTime) +} + +func (x *versionData) beforeSave() {} +func (x *versionData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *versionData) afterLoad() {} +func (x *versionData) load(m state.Map) { + m.Load("k", &x.k) +} + +func init() { + state.Register("proc.execArgInode", (*execArgInode)(nil), state.Fns{Save: (*execArgInode).save, Load: (*execArgInode).load}) + state.Register("proc.execArgFile", (*execArgFile)(nil), state.Fns{Save: (*execArgFile).save, Load: (*execArgFile).load}) + state.Register("proc.fdDir", (*fdDir)(nil), state.Fns{Save: (*fdDir).save, Load: (*fdDir).load}) + state.Register("proc.fdDirFile", (*fdDirFile)(nil), state.Fns{Save: (*fdDirFile).save, Load: (*fdDirFile).load}) + state.Register("proc.fdInfoDir", (*fdInfoDir)(nil), state.Fns{Save: (*fdInfoDir).save, Load: (*fdInfoDir).load}) + state.Register("proc.filesystemsData", (*filesystemsData)(nil), state.Fns{Save: (*filesystemsData).save, Load: (*filesystemsData).load}) + state.Register("proc.filesystem", (*filesystem)(nil), state.Fns{Save: (*filesystem).save, Load: (*filesystem).load}) + state.Register("proc.taskOwnedInodeOps", (*taskOwnedInodeOps)(nil), state.Fns{Save: (*taskOwnedInodeOps).save, Load: (*taskOwnedInodeOps).load}) + state.Register("proc.staticFileInodeOps", (*staticFileInodeOps)(nil), state.Fns{Save: (*staticFileInodeOps).save, Load: (*staticFileInodeOps).load}) + state.Register("proc.loadavgData", (*loadavgData)(nil), state.Fns{Save: (*loadavgData).save, Load: (*loadavgData).load}) + state.Register("proc.meminfoData", (*meminfoData)(nil), state.Fns{Save: (*meminfoData).save, Load: (*meminfoData).load}) + state.Register("proc.mountInfoFile", (*mountInfoFile)(nil), state.Fns{Save: (*mountInfoFile).save, Load: (*mountInfoFile).load}) + state.Register("proc.mountsFile", (*mountsFile)(nil), state.Fns{Save: (*mountsFile).save, Load: (*mountsFile).load}) + state.Register("proc.ifinet6", (*ifinet6)(nil), state.Fns{Save: (*ifinet6).save, Load: (*ifinet6).load}) + state.Register("proc.netDev", (*netDev)(nil), state.Fns{Save: (*netDev).save, Load: (*netDev).load}) + state.Register("proc.netUnix", (*netUnix)(nil), state.Fns{Save: (*netUnix).save, Load: (*netUnix).load}) + state.Register("proc.netTCP", (*netTCP)(nil), state.Fns{Save: (*netTCP).save, Load: (*netTCP).load}) + state.Register("proc.proc", (*proc)(nil), state.Fns{Save: (*proc).save, Load: (*proc).load}) + state.Register("proc.self", (*self)(nil), state.Fns{Save: (*self).save, Load: (*self).load}) + state.Register("proc.threadSelf", (*threadSelf)(nil), state.Fns{Save: (*threadSelf).save, Load: (*threadSelf).load}) + state.Register("proc.rootProcFile", (*rootProcFile)(nil), state.Fns{Save: (*rootProcFile).save, Load: (*rootProcFile).load}) + state.Register("proc.statData", (*statData)(nil), state.Fns{Save: (*statData).save, Load: (*statData).load}) + state.Register("proc.mmapMinAddrData", (*mmapMinAddrData)(nil), state.Fns{Save: (*mmapMinAddrData).save, Load: (*mmapMinAddrData).load}) + state.Register("proc.overcommitMemory", (*overcommitMemory)(nil), state.Fns{Save: (*overcommitMemory).save, Load: (*overcommitMemory).load}) + state.Register("proc.hostname", (*hostname)(nil), state.Fns{Save: (*hostname).save, Load: (*hostname).load}) + state.Register("proc.hostnameFile", (*hostnameFile)(nil), state.Fns{Save: (*hostnameFile).save, Load: (*hostnameFile).load}) + state.Register("proc.tcpMemInode", (*tcpMemInode)(nil), state.Fns{Save: (*tcpMemInode).save, Load: (*tcpMemInode).load}) + state.Register("proc.tcpMemFile", (*tcpMemFile)(nil), state.Fns{Save: (*tcpMemFile).save, Load: (*tcpMemFile).load}) + state.Register("proc.tcpSack", (*tcpSack)(nil), state.Fns{Save: (*tcpSack).save, Load: (*tcpSack).load}) + state.Register("proc.tcpSackFile", (*tcpSackFile)(nil), state.Fns{Save: (*tcpSackFile).save, Load: (*tcpSackFile).load}) + state.Register("proc.taskDir", (*taskDir)(nil), state.Fns{Save: (*taskDir).save, Load: (*taskDir).load}) + state.Register("proc.subtasks", (*subtasks)(nil), state.Fns{Save: (*subtasks).save, Load: (*subtasks).load}) + state.Register("proc.subtasksFile", (*subtasksFile)(nil), state.Fns{Save: (*subtasksFile).save, Load: (*subtasksFile).load}) + state.Register("proc.exe", (*exe)(nil), state.Fns{Save: (*exe).save, Load: (*exe).load}) + state.Register("proc.namespaceSymlink", (*namespaceSymlink)(nil), state.Fns{Save: (*namespaceSymlink).save, Load: (*namespaceSymlink).load}) + state.Register("proc.mapsData", (*mapsData)(nil), state.Fns{Save: (*mapsData).save, Load: (*mapsData).load}) + state.Register("proc.smapsData", (*smapsData)(nil), state.Fns{Save: (*smapsData).save, Load: (*smapsData).load}) + state.Register("proc.taskStatData", (*taskStatData)(nil), state.Fns{Save: (*taskStatData).save, Load: (*taskStatData).load}) + state.Register("proc.statmData", (*statmData)(nil), state.Fns{Save: (*statmData).save, Load: (*statmData).load}) + state.Register("proc.statusData", (*statusData)(nil), state.Fns{Save: (*statusData).save, Load: (*statusData).load}) + state.Register("proc.ioData", (*ioData)(nil), state.Fns{Save: (*ioData).save, Load: (*ioData).load}) + state.Register("proc.comm", (*comm)(nil), state.Fns{Save: (*comm).save, Load: (*comm).load}) + state.Register("proc.commFile", (*commFile)(nil), state.Fns{Save: (*commFile).save, Load: (*commFile).load}) + state.Register("proc.auxvec", (*auxvec)(nil), state.Fns{Save: (*auxvec).save, Load: (*auxvec).load}) + state.Register("proc.auxvecFile", (*auxvecFile)(nil), state.Fns{Save: (*auxvecFile).save, Load: (*auxvecFile).load}) + state.Register("proc.idMapInodeOperations", (*idMapInodeOperations)(nil), state.Fns{Save: (*idMapInodeOperations).save, Load: (*idMapInodeOperations).load}) + state.Register("proc.idMapFileOperations", (*idMapFileOperations)(nil), state.Fns{Save: (*idMapFileOperations).save, Load: (*idMapFileOperations).load}) + state.Register("proc.uptime", (*uptime)(nil), state.Fns{Save: (*uptime).save, Load: (*uptime).load}) + state.Register("proc.uptimeFile", (*uptimeFile)(nil), state.Fns{Save: (*uptimeFile).save, Load: (*uptimeFile).load}) + state.Register("proc.versionData", (*versionData)(nil), state.Fns{Save: (*versionData).save, Load: (*versionData).load}) +} diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD deleted file mode 100644 index 20c3eefc8..000000000 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "seqfile", - srcs = ["seqfile.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/proc/device", - "//pkg/sentry/kernel/time", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "seqfile_test", - size = "small", - srcs = ["seqfile_test.go"], - embed = [":seqfile"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go new file mode 100755 index 000000000..db9f7ceb9 --- /dev/null +++ b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go @@ -0,0 +1,58 @@ +// automatically generated by stateify. + +package seqfile + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *SeqData) beforeSave() {} +func (x *SeqData) save(m state.Map) { + x.beforeSave() + m.Save("Buf", &x.Buf) + m.Save("Handle", &x.Handle) +} + +func (x *SeqData) afterLoad() {} +func (x *SeqData) load(m state.Map) { + m.Load("Buf", &x.Buf) + m.Load("Handle", &x.Handle) +} + +func (x *SeqFile) beforeSave() {} +func (x *SeqFile) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("SeqSource", &x.SeqSource) + m.Save("source", &x.source) + m.Save("generation", &x.generation) + m.Save("lastRead", &x.lastRead) +} + +func (x *SeqFile) afterLoad() {} +func (x *SeqFile) load(m state.Map) { + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("SeqSource", &x.SeqSource) + m.Load("source", &x.source) + m.Load("generation", &x.generation) + m.Load("lastRead", &x.lastRead) +} + +func (x *seqFileOperations) beforeSave() {} +func (x *seqFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("seqFile", &x.seqFile) +} + +func (x *seqFileOperations) afterLoad() {} +func (x *seqFileOperations) load(m state.Map) { + m.Load("seqFile", &x.seqFile) +} + +func init() { + state.Register("seqfile.SeqData", (*SeqData)(nil), state.Fns{Save: (*SeqData).save, Load: (*SeqData).load}) + state.Register("seqfile.SeqFile", (*SeqFile)(nil), state.Fns{Save: (*SeqFile).save, Load: (*SeqFile).load}) + state.Register("seqfile.seqFileOperations", (*seqFileOperations)(nil), state.Fns{Save: (*seqFileOperations).save, Load: (*seqFileOperations).load}) +} diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go deleted file mode 100644 index ebfeee835..000000000 --- a/pkg/sentry/fs/proc/seqfile/seqfile_test.go +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package seqfile - -import ( - "bytes" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -type seqTest struct { - actual []SeqData - update bool -} - -func (s *seqTest) Init() { - var sq []SeqData - // Create some SeqData. - for i := 0; i < 10; i++ { - var b []byte - for j := 0; j < 10; j++ { - b = append(b, byte(i)) - } - sq = append(sq, SeqData{ - Buf: b, - Handle: &testHandle{i: i}, - }) - } - s.actual = sq -} - -// NeedsUpdate reports whether we need to update the data we've previously read. -func (s *seqTest) NeedsUpdate(int64) bool { - return s.update -} - -// ReadSeqFiledata returns a slice of SeqData which contains elements -// greater than the handle. -func (s *seqTest) ReadSeqFileData(ctx context.Context, handle SeqHandle) ([]SeqData, int64) { - if handle == nil { - return s.actual, 0 - } - h := *handle.(*testHandle) - var ret []SeqData - for _, b := range s.actual { - // We want the next one. - h2 := *b.Handle.(*testHandle) - if h2.i > h.i { - ret = append(ret, b) - } - } - return ret, 0 -} - -// Flatten a slice of slices into one slice. -func flatten(buf ...[]byte) []byte { - var flat []byte - for _, b := range buf { - flat = append(flat, b...) - } - return flat -} - -type testHandle struct { - i int -} - -type testTable struct { - offset int64 - readBufferSize int - expectedData []byte - expectedError error -} - -func runTableTests(ctx context.Context, table []testTable, dirent *fs.Dirent) error { - for _, tt := range table { - file, err := dirent.Inode.InodeOperations.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - return fmt.Errorf("GetFile returned error: %v", err) - } - - data := make([]byte, tt.readBufferSize) - resultLen, err := file.Preadv(ctx, usermem.BytesIOSequence(data), tt.offset) - if err != tt.expectedError { - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (error) => %v expected %v", tt.readBufferSize, tt.offset, err, tt.expectedError) - } - expectedLen := int64(len(tt.expectedData)) - if resultLen != expectedLen { - // We make this just an error so we wall through and print the data below. - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (size) => %v expected %v", tt.readBufferSize, tt.offset, resultLen, expectedLen) - } - if !bytes.Equal(data[:expectedLen], tt.expectedData) { - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (data) => %v expected %v", tt.readBufferSize, tt.offset, data[:expectedLen], tt.expectedData) - } - } - return nil -} - -func TestSeqFile(t *testing.T) { - testSource := &seqTest{} - testSource.Init() - - // Create a file that can be R/W. - ctx := contexttest.Context(t) - m := fs.NewPseudoMountSource(ctx) - contents := map[string]*fs.Inode{ - "foo": NewSeqFileInode(ctx, testSource, m), - } - root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) - - // How about opening it? - inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) - dirent2, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo for n2: %v", err) - } - n2 := dirent2.Inode.InodeOperations - file2, err := n2.GetFile(ctx, dirent2, fs.FileFlags{Read: true, Write: true}) - if err != nil { - t.Fatalf("GetFile returned error: %v", err) - } - - // Writing? - if _, err := file2.Writev(ctx, usermem.BytesIOSequence([]byte("test"))); err == nil { - t.Fatalf("managed to write to n2: %v", err) - } - - // How about reading? - dirent3, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo: %v", err) - } - n3 := dirent3.Inode.InodeOperations - if n2 != n3 { - t.Error("got n2 != n3, want same") - } - - testSource.update = true - - table := []testTable{ - // Read past the end. - {100, 4, []byte{}, io.EOF}, - {110, 4, []byte{}, io.EOF}, - {200, 4, []byte{}, io.EOF}, - // Read a truncated first line. - {0, 4, testSource.actual[0].Buf[:4], nil}, - // Read the whole first line. - {0, 10, testSource.actual[0].Buf, nil}, - // Read the whole first line + 5 bytes of second line. - {0, 15, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:5]), nil}, - // First 4 bytes of the second line. - {10, 4, testSource.actual[1].Buf[:4], nil}, - // Read the two first lines. - {0, 20, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf), nil}, - // Read three lines. - {0, 30, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf), nil}, - // Read everything, but use a bigger buffer than necessary. - {0, 150, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf, testSource.actual[3].Buf, testSource.actual[4].Buf, testSource.actual[5].Buf, testSource.actual[6].Buf, testSource.actual[7].Buf, testSource.actual[8].Buf, testSource.actual[9].Buf), nil}, - // Read the last 3 bytes. - {97, 10, testSource.actual[9].Buf[7:], nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed with testSource.update = %v : %v", testSource.update, err) - } - - // Disable updates and do it again. - testSource.update = false - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed with testSource.update = %v: %v", testSource.update, err) - } -} - -// Test that we behave correctly when the file is updated. -func TestSeqFileFileUpdated(t *testing.T) { - testSource := &seqTest{} - testSource.Init() - testSource.update = true - - // Create a file that can be R/W. - ctx := contexttest.Context(t) - m := fs.NewPseudoMountSource(ctx) - contents := map[string]*fs.Inode{ - "foo": NewSeqFileInode(ctx, testSource, m), - } - root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) - - // How about opening it? - inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) - dirent2, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo for dirent2: %v", err) - } - - table := []testTable{ - {0, 16, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:6]), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed: %v", err) - } - // Delete the first entry. - cut := testSource.actual[0].Buf - testSource.actual = testSource.actual[1:] - - table = []testTable{ - // Try reading buffer 0 with an offset. This will not delete the old data. - {1, 5, cut[1:6], nil}, - // Reset our file by reading at offset 0. - {0, 10, testSource.actual[0].Buf, nil}, - {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil}, - // Read the same data a second time. - {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil}, - // Read the following two lines. - {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after removing first entry: %v", err) - } - - // Add a new duplicate line in the middle (6666...) - after := testSource.actual[5:] - testSource.actual = testSource.actual[:4] - // Note the list must be sorted. - testSource.actual = append(testSource.actual, after[0]) - testSource.actual = append(testSource.actual, after...) - - table = []testTable{ - {50, 20, flatten(testSource.actual[4].Buf, testSource.actual[5].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after adding middle entry: %v", err) - } - // This will be used in a later test. - oldTestData := testSource.actual - - // Delete everything. - testSource.actual = testSource.actual[:0] - table = []testTable{ - {20, 20, []byte{}, io.EOF}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after removing all entries: %v", err) - } - // Restore some of the data. - testSource.actual = oldTestData[:1] - table = []testTable{ - {6, 20, testSource.actual[0].Buf[6:], nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after adding first entry back: %v", err) - } - - // Re-extend the data - testSource.actual = oldTestData - table = []testTable{ - {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after extending testSource: %v", err) - } -} diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go deleted file mode 100644 index 6abae7a60..000000000 --- a/pkg/sentry/fs/proc/sys_net_test.go +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -func TestQuerySendBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - s.TCPSendBufSize = inet.TCPBufferSize{100, 200, 300} - tmi := &tcpMemInode{s: s, dir: tcpWMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - buf := make([]byte, 100) - dst := usermem.BytesIOSequence(buf) - n, err := tmf.Read(ctx, nil, dst, 0) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if got, want := string(buf[:n]), "100\t200\t300\n"; got != want { - t.Fatalf("Bad string: got %v, want %v", got, want) - } -} - -func TestQueryRecvBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - s.TCPRecvBufSize = inet.TCPBufferSize{100, 200, 300} - tmi := &tcpMemInode{s: s, dir: tcpRMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - buf := make([]byte, 100) - dst := usermem.BytesIOSequence(buf) - n, err := tmf.Read(ctx, nil, dst, 0) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if got, want := string(buf[:n]), "100\t200\t300\n"; got != want { - t.Fatalf("Bad string: got %v, want %v", got, want) - } -} - -var cases = []struct { - str string - initial inet.TCPBufferSize - final inet.TCPBufferSize -}{ - { - str: "", - initial: inet.TCPBufferSize{1, 2, 3}, - final: inet.TCPBufferSize{1, 2, 3}, - }, - { - str: "100\n", - initial: inet.TCPBufferSize{1, 100, 200}, - final: inet.TCPBufferSize{100, 100, 200}, - }, - { - str: "100 200 300\n", - initial: inet.TCPBufferSize{1, 2, 3}, - final: inet.TCPBufferSize{100, 200, 300}, - }, -} - -func TestConfigureSendBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - for _, c := range cases { - s.TCPSendBufSize = c.initial - tmi := &tcpMemInode{s: s, dir: tcpWMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := tmf.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("Write, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if s.TCPSendBufSize != c.final { - t.Errorf("TCPSendBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPSendBufSize, c.final) - } - } -} - -func TestConfigureRecvBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - for _, c := range cases { - s.TCPRecvBufSize = c.initial - tmi := &tcpMemInode{s: s, dir: tcpRMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := tmf.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("Write, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if s.TCPRecvBufSize != c.final { - t.Errorf("TCPRecvBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPRecvBufSize, c.final) - } - } -} diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD deleted file mode 100644 index 516efcc4c..000000000 --- a/pkg/sentry/fs/ramfs/BUILD +++ /dev/null @@ -1,37 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "ramfs", - srcs = [ - "dir.go", - "socket.go", - "symlink.go", - "tree.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ramfs", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "ramfs_test", - size = "small", - srcs = ["tree_test.go"], - embed = [":ramfs"], - deps = [ - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - ], -) diff --git a/pkg/sentry/fs/ramfs/ramfs_state_autogen.go b/pkg/sentry/fs/ramfs/ramfs_state_autogen.go new file mode 100755 index 000000000..bc86a01a1 --- /dev/null +++ b/pkg/sentry/fs/ramfs/ramfs_state_autogen.go @@ -0,0 +1,94 @@ +// automatically generated by stateify. + +package ramfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Dir) beforeSave() {} +func (x *Dir) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("children", &x.children) + m.Save("dentryMap", &x.dentryMap) +} + +func (x *Dir) afterLoad() {} +func (x *Dir) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("children", &x.children) + m.Load("dentryMap", &x.dentryMap) +} + +func (x *dirFileOperations) beforeSave() {} +func (x *dirFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("dirCursor", &x.dirCursor) + m.Save("dir", &x.dir) +} + +func (x *dirFileOperations) afterLoad() {} +func (x *dirFileOperations) load(m state.Map) { + m.Load("dirCursor", &x.dirCursor) + m.Load("dir", &x.dir) +} + +func (x *Socket) beforeSave() {} +func (x *Socket) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("ep", &x.ep) +} + +func (x *Socket) afterLoad() {} +func (x *Socket) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("ep", &x.ep) +} + +func (x *socketFileOperations) beforeSave() {} +func (x *socketFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *socketFileOperations) afterLoad() {} +func (x *socketFileOperations) load(m state.Map) { +} + +func (x *Symlink) beforeSave() {} +func (x *Symlink) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("Target", &x.Target) +} + +func (x *Symlink) afterLoad() {} +func (x *Symlink) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("Target", &x.Target) +} + +func (x *symlinkFileOperations) beforeSave() {} +func (x *symlinkFileOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *symlinkFileOperations) afterLoad() {} +func (x *symlinkFileOperations) load(m state.Map) { +} + +func init() { + state.Register("ramfs.Dir", (*Dir)(nil), state.Fns{Save: (*Dir).save, Load: (*Dir).load}) + state.Register("ramfs.dirFileOperations", (*dirFileOperations)(nil), state.Fns{Save: (*dirFileOperations).save, Load: (*dirFileOperations).load}) + state.Register("ramfs.Socket", (*Socket)(nil), state.Fns{Save: (*Socket).save, Load: (*Socket).load}) + state.Register("ramfs.socketFileOperations", (*socketFileOperations)(nil), state.Fns{Save: (*socketFileOperations).save, Load: (*socketFileOperations).load}) + state.Register("ramfs.Symlink", (*Symlink)(nil), state.Fns{Save: (*Symlink).save, Load: (*Symlink).load}) + state.Register("ramfs.symlinkFileOperations", (*symlinkFileOperations)(nil), state.Fns{Save: (*symlinkFileOperations).save, Load: (*symlinkFileOperations).load}) +} diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go deleted file mode 100644 index 61a7e2900..000000000 --- a/pkg/sentry/fs/ramfs/tree_test.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ramfs - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -func TestMakeDirectoryTree(t *testing.T) { - - for _, test := range []struct { - name string - subdirs []string - }{ - { - name: "abs paths", - subdirs: []string{ - "/tmp", - "/tmp/a/b", - "/tmp/a/c/d", - "/tmp/c", - "/proc", - "/dev/a/b", - "/tmp", - }, - }, - { - name: "rel paths", - subdirs: []string{ - "tmp", - "tmp/a/b", - "tmp/a/c/d", - "tmp/c", - "proc", - "dev/a/b", - "tmp", - }, - }, - } { - ctx := contexttest.Context(t) - mount := fs.NewPseudoMountSource(ctx) - tree, err := MakeDirectoryTree(ctx, mount, test.subdirs) - if err != nil { - t.Errorf("%s: failed to make ramfs tree, got error %v, want nil", test.name, err) - continue - } - - // Expect to be able to find each of the paths. - mm, err := fs.NewMountNamespace(ctx, tree) - if err != nil { - t.Errorf("%s: failed to create mount manager: %v", test.name, err) - continue - } - root := mm.Root() - defer mm.DecRef() - - for _, p := range test.subdirs { - maxTraversals := uint(0) - if _, err := mm.FindInode(ctx, root, nil, p, &maxTraversals); err != nil { - t.Errorf("%s: failed to find node %s: %v", test.name, p, err) - break - } - } - } -} diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD deleted file mode 100644 index 70fa3af89..000000000 --- a/pkg/sentry/fs/sys/BUILD +++ /dev/null @@ -1,25 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "sys", - srcs = [ - "device.go", - "devices.go", - "fs.go", - "sys.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/sys", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/kernel", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/fs/sys/sys_state_autogen.go b/pkg/sentry/fs/sys/sys_state_autogen.go new file mode 100755 index 000000000..603057309 --- /dev/null +++ b/pkg/sentry/fs/sys/sys_state_autogen.go @@ -0,0 +1,34 @@ +// automatically generated by stateify. + +package sys + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *cpunum) beforeSave() {} +func (x *cpunum) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeStaticFileGetter", &x.InodeStaticFileGetter) +} + +func (x *cpunum) afterLoad() {} +func (x *cpunum) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeStaticFileGetter", &x.InodeStaticFileGetter) +} + +func (x *filesystem) beforeSave() {} +func (x *filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystem) afterLoad() {} +func (x *filesystem) load(m state.Map) { +} + +func init() { + state.Register("sys.cpunum", (*cpunum)(nil), state.Fns{Save: (*cpunum).save, Load: (*cpunum).load}) + state.Register("sys.filesystem", (*filesystem)(nil), state.Fns{Save: (*filesystem).save, Load: (*filesystem).load}) +} diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD deleted file mode 100644 index 1d80daeaf..000000000 --- a/pkg/sentry/fs/timerfd/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "timerfd", - srcs = ["timerfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/timerfd", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel/time", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/timerfd/timerfd_state_autogen.go b/pkg/sentry/fs/timerfd/timerfd_state_autogen.go new file mode 100755 index 000000000..e8d98af97 --- /dev/null +++ b/pkg/sentry/fs/timerfd/timerfd_state_autogen.go @@ -0,0 +1,25 @@ +// automatically generated by stateify. + +package timerfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *TimerOperations) beforeSave() {} +func (x *TimerOperations) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.events) { m.Failf("events is %v, expected zero", x.events) } + m.Save("timer", &x.timer) + m.Save("val", &x.val) +} + +func (x *TimerOperations) afterLoad() {} +func (x *TimerOperations) load(m state.Map) { + m.Load("timer", &x.timer) + m.Load("val", &x.val) +} + +func init() { + state.Register("timerfd.TimerOperations", (*TimerOperations)(nil), state.Fns{Save: (*TimerOperations).save, Load: (*TimerOperations).load}) +} diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD deleted file mode 100644 index 8f7eb5757..000000000 --- a/pkg/sentry/fs/tmpfs/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "tmpfs", - srcs = [ - "device.go", - "file_regular.go", - "fs.go", - "inode_file.go", - "tmpfs.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/metric", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/safemem", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "tmpfs_test", - size = "small", - srcs = ["file_test.go"], - embed = [":tmpfs"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go deleted file mode 100644 index 0075ef023..000000000 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -func newFileInode(ctx context.Context) *fs.Inode { - m := fs.NewCachingMountSource(ctx, &Filesystem{}, fs.MountSourceFlags{}) - iops := NewInMemoryFile(ctx, usage.Tmpfs, fs.WithCurrentTime(ctx, fs.UnstableAttr{})) - return fs.NewInode(ctx, iops, m, fs.StableAttr{ - DeviceID: tmpfsDevice.DeviceID(), - InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, - Type: fs.RegularFile, - }) -} - -func newFile(ctx context.Context) *fs.File { - inode := newFileInode(ctx) - f, _ := inode.GetFile(ctx, fs.NewDirent(ctx, inode, "stub"), fs.FileFlags{Read: true, Write: true}) - return f -} - -// Allocate once, write twice. -func TestGrow(t *testing.T) { - ctx := contexttest.Context(t) - f := newFile(ctx) - defer f.DecRef() - - abuf := bytes.Repeat([]byte{'a'}, 68) - n, err := f.Pwritev(ctx, usermem.BytesIOSequence(abuf), 0) - if n != int64(len(abuf)) || err != nil { - t.Fatalf("Pwritev got (%d, %v) want (%d, nil)", n, err, len(abuf)) - } - - bbuf := bytes.Repeat([]byte{'b'}, 856) - n, err = f.Pwritev(ctx, usermem.BytesIOSequence(bbuf), 68) - if n != int64(len(bbuf)) || err != nil { - t.Fatalf("Pwritev got (%d, %v) want (%d, nil)", n, err, len(bbuf)) - } - - rbuf := make([]byte, len(abuf)+len(bbuf)) - n, err = f.Preadv(ctx, usermem.BytesIOSequence(rbuf), 0) - if n != int64(len(rbuf)) || err != nil { - t.Fatalf("Preadv got (%d, %v) want (%d, nil)", n, err, len(rbuf)) - } - - if want := append(abuf, bbuf...); !bytes.Equal(rbuf, want) { - t.Fatalf("Read %v, want %v", rbuf, want) - } -} diff --git a/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go b/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go new file mode 100755 index 000000000..7d73d1c77 --- /dev/null +++ b/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go @@ -0,0 +1,108 @@ +// automatically generated by stateify. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *regularFileOperations) beforeSave() {} +func (x *regularFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("iops", &x.iops) +} + +func (x *regularFileOperations) afterLoad() {} +func (x *regularFileOperations) load(m state.Map) { + m.Load("iops", &x.iops) +} + +func (x *Filesystem) beforeSave() {} +func (x *Filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *Filesystem) afterLoad() {} +func (x *Filesystem) load(m state.Map) { +} + +func (x *fileInodeOperations) beforeSave() {} +func (x *fileInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("kernel", &x.kernel) + m.Save("memUsage", &x.memUsage) + m.Save("attr", &x.attr) + m.Save("mappings", &x.mappings) + m.Save("writableMappingPages", &x.writableMappingPages) + m.Save("data", &x.data) + m.Save("seals", &x.seals) +} + +func (x *fileInodeOperations) afterLoad() {} +func (x *fileInodeOperations) load(m state.Map) { + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("kernel", &x.kernel) + m.Load("memUsage", &x.memUsage) + m.Load("attr", &x.attr) + m.Load("mappings", &x.mappings) + m.Load("writableMappingPages", &x.writableMappingPages) + m.Load("data", &x.data) + m.Load("seals", &x.seals) +} + +func (x *Dir) beforeSave() {} +func (x *Dir) save(m state.Map) { + x.beforeSave() + m.Save("ramfsDir", &x.ramfsDir) + m.Save("kernel", &x.kernel) +} + +func (x *Dir) load(m state.Map) { + m.Load("ramfsDir", &x.ramfsDir) + m.Load("kernel", &x.kernel) + m.AfterLoad(x.afterLoad) +} + +func (x *Symlink) beforeSave() {} +func (x *Symlink) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) +} + +func (x *Symlink) afterLoad() {} +func (x *Symlink) load(m state.Map) { + m.Load("Symlink", &x.Symlink) +} + +func (x *Socket) beforeSave() {} +func (x *Socket) save(m state.Map) { + x.beforeSave() + m.Save("Socket", &x.Socket) +} + +func (x *Socket) afterLoad() {} +func (x *Socket) load(m state.Map) { + m.Load("Socket", &x.Socket) +} + +func (x *Fifo) beforeSave() {} +func (x *Fifo) save(m state.Map) { + x.beforeSave() + m.Save("InodeOperations", &x.InodeOperations) +} + +func (x *Fifo) afterLoad() {} +func (x *Fifo) load(m state.Map) { + m.Load("InodeOperations", &x.InodeOperations) +} + +func init() { + state.Register("tmpfs.regularFileOperations", (*regularFileOperations)(nil), state.Fns{Save: (*regularFileOperations).save, Load: (*regularFileOperations).load}) + state.Register("tmpfs.Filesystem", (*Filesystem)(nil), state.Fns{Save: (*Filesystem).save, Load: (*Filesystem).load}) + state.Register("tmpfs.fileInodeOperations", (*fileInodeOperations)(nil), state.Fns{Save: (*fileInodeOperations).save, Load: (*fileInodeOperations).load}) + state.Register("tmpfs.Dir", (*Dir)(nil), state.Fns{Save: (*Dir).save, Load: (*Dir).load}) + state.Register("tmpfs.Symlink", (*Symlink)(nil), state.Fns{Save: (*Symlink).save, Load: (*Symlink).load}) + state.Register("tmpfs.Socket", (*Socket)(nil), state.Fns{Save: (*Socket).save, Load: (*Socket).load}) + state.Register("tmpfs.Fifo", (*Fifo)(nil), state.Fns{Save: (*Fifo).save, Load: (*Fifo).load}) +} diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD deleted file mode 100644 index 291164986..000000000 --- a/pkg/sentry/fs/tty/BUILD +++ /dev/null @@ -1,47 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "tty", - srcs = [ - "dir.go", - "fs.go", - "line_discipline.go", - "master.go", - "queue.go", - "slave.go", - "terminal.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tty", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/refs", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/safemem", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "tty_test", - size = "small", - srcs = ["tty_test.go"], - embed = [":tty"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/fs/tty/tty_state_autogen.go b/pkg/sentry/fs/tty/tty_state_autogen.go new file mode 100755 index 000000000..c54600104 --- /dev/null +++ b/pkg/sentry/fs/tty/tty_state_autogen.go @@ -0,0 +1,206 @@ +// automatically generated by stateify. + +package tty + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *dirInodeOperations) beforeSave() {} +func (x *dirInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("msrc", &x.msrc) + m.Save("master", &x.master) + m.Save("slaves", &x.slaves) + m.Save("dentryMap", &x.dentryMap) + m.Save("next", &x.next) +} + +func (x *dirInodeOperations) afterLoad() {} +func (x *dirInodeOperations) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("msrc", &x.msrc) + m.Load("master", &x.master) + m.Load("slaves", &x.slaves) + m.Load("dentryMap", &x.dentryMap) + m.Load("next", &x.next) +} + +func (x *dirFileOperations) beforeSave() {} +func (x *dirFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("di", &x.di) + m.Save("dirCursor", &x.dirCursor) +} + +func (x *dirFileOperations) afterLoad() {} +func (x *dirFileOperations) load(m state.Map) { + m.Load("di", &x.di) + m.Load("dirCursor", &x.dirCursor) +} + +func (x *filesystem) beforeSave() {} +func (x *filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystem) afterLoad() {} +func (x *filesystem) load(m state.Map) { +} + +func (x *superOperations) beforeSave() {} +func (x *superOperations) save(m state.Map) { + x.beforeSave() +} + +func (x *superOperations) afterLoad() {} +func (x *superOperations) load(m state.Map) { +} + +func (x *lineDiscipline) beforeSave() {} +func (x *lineDiscipline) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.masterWaiter) { m.Failf("masterWaiter is %v, expected zero", x.masterWaiter) } + if !state.IsZeroValue(x.slaveWaiter) { m.Failf("slaveWaiter is %v, expected zero", x.slaveWaiter) } + m.Save("size", &x.size) + m.Save("inQueue", &x.inQueue) + m.Save("outQueue", &x.outQueue) + m.Save("termios", &x.termios) + m.Save("column", &x.column) +} + +func (x *lineDiscipline) afterLoad() {} +func (x *lineDiscipline) load(m state.Map) { + m.Load("size", &x.size) + m.Load("inQueue", &x.inQueue) + m.Load("outQueue", &x.outQueue) + m.Load("termios", &x.termios) + m.Load("column", &x.column) +} + +func (x *outputQueueTransformer) beforeSave() {} +func (x *outputQueueTransformer) save(m state.Map) { + x.beforeSave() +} + +func (x *outputQueueTransformer) afterLoad() {} +func (x *outputQueueTransformer) load(m state.Map) { +} + +func (x *inputQueueTransformer) beforeSave() {} +func (x *inputQueueTransformer) save(m state.Map) { + x.beforeSave() +} + +func (x *inputQueueTransformer) afterLoad() {} +func (x *inputQueueTransformer) load(m state.Map) { +} + +func (x *masterInodeOperations) beforeSave() {} +func (x *masterInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("d", &x.d) +} + +func (x *masterInodeOperations) afterLoad() {} +func (x *masterInodeOperations) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("d", &x.d) +} + +func (x *masterFileOperations) beforeSave() {} +func (x *masterFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("d", &x.d) + m.Save("t", &x.t) +} + +func (x *masterFileOperations) afterLoad() {} +func (x *masterFileOperations) load(m state.Map) { + m.Load("d", &x.d) + m.Load("t", &x.t) +} + +func (x *queue) beforeSave() {} +func (x *queue) save(m state.Map) { + x.beforeSave() + m.Save("readBuf", &x.readBuf) + m.Save("waitBuf", &x.waitBuf) + m.Save("waitBufLen", &x.waitBufLen) + m.Save("readable", &x.readable) + m.Save("transformer", &x.transformer) +} + +func (x *queue) afterLoad() {} +func (x *queue) load(m state.Map) { + m.Load("readBuf", &x.readBuf) + m.Load("waitBuf", &x.waitBuf) + m.Load("waitBufLen", &x.waitBufLen) + m.Load("readable", &x.readable) + m.Load("transformer", &x.transformer) +} + +func (x *slaveInodeOperations) beforeSave() {} +func (x *slaveInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("d", &x.d) + m.Save("t", &x.t) +} + +func (x *slaveInodeOperations) afterLoad() {} +func (x *slaveInodeOperations) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("d", &x.d) + m.Load("t", &x.t) +} + +func (x *slaveFileOperations) beforeSave() {} +func (x *slaveFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("si", &x.si) +} + +func (x *slaveFileOperations) afterLoad() {} +func (x *slaveFileOperations) load(m state.Map) { + m.Load("si", &x.si) +} + +func (x *Terminal) beforeSave() {} +func (x *Terminal) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("n", &x.n) + m.Save("d", &x.d) + m.Save("ld", &x.ld) + m.Save("masterKTTY", &x.masterKTTY) + m.Save("slaveKTTY", &x.slaveKTTY) +} + +func (x *Terminal) afterLoad() {} +func (x *Terminal) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("n", &x.n) + m.Load("d", &x.d) + m.Load("ld", &x.ld) + m.Load("masterKTTY", &x.masterKTTY) + m.Load("slaveKTTY", &x.slaveKTTY) +} + +func init() { + state.Register("tty.dirInodeOperations", (*dirInodeOperations)(nil), state.Fns{Save: (*dirInodeOperations).save, Load: (*dirInodeOperations).load}) + state.Register("tty.dirFileOperations", (*dirFileOperations)(nil), state.Fns{Save: (*dirFileOperations).save, Load: (*dirFileOperations).load}) + state.Register("tty.filesystem", (*filesystem)(nil), state.Fns{Save: (*filesystem).save, Load: (*filesystem).load}) + state.Register("tty.superOperations", (*superOperations)(nil), state.Fns{Save: (*superOperations).save, Load: (*superOperations).load}) + state.Register("tty.lineDiscipline", (*lineDiscipline)(nil), state.Fns{Save: (*lineDiscipline).save, Load: (*lineDiscipline).load}) + state.Register("tty.outputQueueTransformer", (*outputQueueTransformer)(nil), state.Fns{Save: (*outputQueueTransformer).save, Load: (*outputQueueTransformer).load}) + state.Register("tty.inputQueueTransformer", (*inputQueueTransformer)(nil), state.Fns{Save: (*inputQueueTransformer).save, Load: (*inputQueueTransformer).load}) + state.Register("tty.masterInodeOperations", (*masterInodeOperations)(nil), state.Fns{Save: (*masterInodeOperations).save, Load: (*masterInodeOperations).load}) + state.Register("tty.masterFileOperations", (*masterFileOperations)(nil), state.Fns{Save: (*masterFileOperations).save, Load: (*masterFileOperations).load}) + state.Register("tty.queue", (*queue)(nil), state.Fns{Save: (*queue).save, Load: (*queue).load}) + state.Register("tty.slaveInodeOperations", (*slaveInodeOperations)(nil), state.Fns{Save: (*slaveInodeOperations).save, Load: (*slaveInodeOperations).load}) + state.Register("tty.slaveFileOperations", (*slaveFileOperations)(nil), state.Fns{Save: (*slaveFileOperations).save, Load: (*slaveFileOperations).load}) + state.Register("tty.Terminal", (*Terminal)(nil), state.Fns{Save: (*Terminal).save, Load: (*Terminal).load}) +} diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go deleted file mode 100644 index 59f07ff8e..000000000 --- a/pkg/sentry/fs/tty/tty_test.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tty - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -func TestSimpleMasterToSlave(t *testing.T) { - ld := newLineDiscipline(linux.DefaultSlaveTermios) - ctx := contexttest.Context(t) - inBytes := []byte("hello, tty\n") - src := usermem.BytesIOSequence(inBytes) - outBytes := make([]byte, 32) - dst := usermem.BytesIOSequence(outBytes) - - // Write to the input queue. - nw, err := ld.inputQueueWrite(ctx, src) - if err != nil { - t.Fatalf("error writing to input queue: %v", err) - } - if nw != int64(len(inBytes)) { - t.Fatalf("wrote wrong length: got %d, want %d", nw, len(inBytes)) - } - - // Read from the input queue. - nr, err := ld.inputQueueRead(ctx, dst) - if err != nil { - t.Fatalf("error reading from input queue: %v", err) - } - if nr != int64(len(inBytes)) { - t.Fatalf("read wrong length: got %d, want %d", nr, len(inBytes)) - } - - outStr := string(outBytes[:nr]) - inStr := string(inBytes) - if outStr != inStr { - t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) - } -} diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD deleted file mode 100644 index d2450e810..000000000 --- a/pkg/sentry/fsimpl/memfs/BUILD +++ /dev/null @@ -1,55 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "dentry_list", - out = "dentry_list.go", - package = "memfs", - prefix = "dentry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dentry", - "Linker": "*dentry", - }, -) - -go_library( - name = "memfs", - srcs = [ - "dentry_list.go", - "directory.go", - "filesystem.go", - "memfs.go", - "regular_file.go", - "symlink.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs", - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) - -go_test( - name = "benchmark_test", - size = "small", - srcs = ["benchmark_test.go"], - deps = [ - ":memfs", - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go deleted file mode 100644 index a94b17db6..000000000 --- a/pkg/sentry/fsimpl/memfs/benchmark_test.go +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package benchmark_test - -import ( - "fmt" - "runtime" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// Differences from stat_benchmark: -// -// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are -// not included. -// -// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp. -// Non-MountStat benchmarks use a tmpfs root mount and no submounts. -// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a -// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus -// stat_benchmark at depth 1 does a comparable amount of work to *MountStat -// benchmarks at depth 2, and non-MountStat benchmarks at depth 3. -var depths = []int{1, 2, 3, 8, 64, 100} - -const ( - mountPointName = "tmp" - filename = "gvisor_test_temp_0_1557494568" -) - -// This is copied from syscalls/linux/sys_file.go, with the dependency on -// kernel.Task stripped out. -func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { - var ( - d *fs.Dirent // The file. - rel *fs.Dirent // The relative directory for search (if required.) - err error - ) - - // Extract the working directory (maybe). - if len(path) > 0 && path[0] == '/' { - // Absolute path; rel can be nil. - } else if dirFD == linux.AT_FDCWD { - // Need to reference the working directory. - rel = wd - } else { - // Need to extract the given FD. - return syserror.EBADF - } - - // Lookup the node. - remainingTraversals := uint(linux.MaxSymlinkTraversals) - if resolve { - d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals) - } else { - d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals) - } - if err != nil { - return err - } - - err = fn(root, d) - d.DecRef() - return err -} - -func BenchmarkVFS1TmpfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef() - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef() - d := root - d.IncRef() - defer d.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef() - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - }) - } -} - -func BenchmarkVFS2MemfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef() - vd := root - vd.IncRef() - defer vd.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: name, - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef() - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: filename, - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - defer fd.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Pathname: filePath, - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - }) - } -} - -func BenchmarkVFS1TmpfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef() - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create and mount the submount. - root := mntns.Root() - defer root.DecRef() - if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - mountPoint, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef() - submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs submount: %v", err) - } - if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - d, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - defer d.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef() - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - }) - } -} - -func BenchmarkVFS2MemfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create the mount point. - root := mntns.Root() - defer root.DecRef() - pop := vfs.PathOperation{ - Root: root, - Start: root, - Pathname: mountPointName, - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - // Save the mount point for later use. - mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef() - // Create and mount the submount. - if err := vfsObj.NewMount(ctx, creds, "", &pop, "memfs", &vfs.NewFilesystemOptions{}); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - defer vd.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: name, - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef() - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Verify that we didn't create any directories under the mount - // point (i.e. they were all created on the submount). - firstDirName := fmt.Sprintf("%d", depth) - if child := mountPoint.Dentry().Child(firstDirName); child != nil { - b.Fatalf("created directory %q under root mount, not submount", firstDirName) - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: filename, - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - fd.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Pathname: filePath, - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - }) - } -} diff --git a/pkg/sentry/fsimpl/memfs/directory.go b/pkg/sentry/fsimpl/memfs/directory.go deleted file mode 100644 index c52dc781c..000000000 --- a/pkg/sentry/fsimpl/memfs/directory.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type directory struct { - inode inode - - // childList is a list containing (1) child Dentries and (2) fake Dentries - // (with inode == nil) that represent the iteration position of - // directoryFDs. childList is used to support directoryFD.IterDirents() - // efficiently. childList is protected by filesystem.mu. - childList dentryList -} - -func (fs *filesystem) newDirectory(creds *auth.Credentials, mode uint16) *inode { - dir := &directory{} - dir.inode.init(dir, fs, creds, mode) - dir.inode.nlink = 2 // from "." and parent directory or ".." for root - return &dir.inode -} - -func (i *inode) isDir() bool { - _, ok := i.impl.(*directory) - return ok -} - -type directoryFD struct { - fileDescription - vfs.DirectoryFileDescriptionDefaultImpl - - // Protected by filesystem.mu. - iter *dentry - off int64 -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { - if fd.iter != nil { - fs := fd.filesystem() - dir := fd.inode().impl.(*directory) - fs.mu.Lock() - dir.childList.Remove(fd.iter) - fs.mu.Unlock() - fd.iter = nil - } -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - fs := fd.filesystem() - vfsd := fd.vfsfd.VirtualDentry().Dentry() - - fs.mu.Lock() - defer fs.mu.Unlock() - - if fd.off == 0 { - if !cb.Handle(vfs.Dirent{ - Name: ".", - Type: linux.DT_DIR, - Ino: vfsd.Impl().(*dentry).inode.ino, - Off: 0, - }) { - return nil - } - fd.off++ - } - if fd.off == 1 { - parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode - if !cb.Handle(vfs.Dirent{ - Name: "..", - Type: parentInode.direntType(), - Ino: parentInode.ino, - Off: 1, - }) { - return nil - } - fd.off++ - } - - dir := vfsd.Impl().(*dentry).inode.impl.(*directory) - var child *dentry - if fd.iter == nil { - // Start iteration at the beginning of dir. - child = dir.childList.Front() - fd.iter = &dentry{} - } else { - // Continue iteration from where we left off. - child = fd.iter.Next() - dir.childList.Remove(fd.iter) - } - for child != nil { - // Skip other directoryFD iterators. - if child.inode != nil { - if !cb.Handle(vfs.Dirent{ - Name: child.vfsd.Name(), - Type: child.inode.direntType(), - Ino: child.inode.ino, - Off: fd.off, - }) { - dir.childList.InsertBefore(child, fd.iter) - return nil - } - fd.off++ - } - child = child.Next() - } - dir.childList.PushBack(fd.iter) - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fs := fd.filesystem() - fs.mu.Lock() - defer fs.mu.Unlock() - - switch whence { - case linux.SEEK_SET: - // Use offset as given. - case linux.SEEK_CUR: - offset += fd.off - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - - // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't - // seek even if doing so might reposition the iterator due to concurrent - // mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek(). - if fd.off == offset { - return offset, nil - } - - fd.off = offset - // Compensate for "." and "..". - remChildren := int64(0) - if offset >= 2 { - remChildren = offset - 2 - } - - dir := fd.inode().impl.(*directory) - - // Ensure that fd.iter exists and is not linked into dir.childList. - if fd.iter == nil { - fd.iter = &dentry{} - } else { - dir.childList.Remove(fd.iter) - } - // Insert fd.iter before the remChildren'th child, or at the end of the - // list if remChildren >= number of children. - child := dir.childList.Front() - for child != nil { - // Skip other directoryFD iterators. - if child.inode != nil { - if remChildren == 0 { - dir.childList.InsertBefore(child, fd.iter) - return offset, nil - } - remChildren-- - } - child = child.Next() - } - dir.childList.PushBack(fd.iter) - return offset, nil -} diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go deleted file mode 100644 index f79e2d9c8..000000000 --- a/pkg/sentry/fsimpl/memfs/filesystem.go +++ /dev/null @@ -1,544 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// stepLocked resolves rp.Component() in parent directory vfsd. -// -// stepLocked is loosely analogous to fs/namei.c:walk_component(). -// -// Preconditions: filesystem.mu must be locked. !rp.Done(). inode == -// vfsd.Impl().(*dentry).inode. -func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode) (*vfs.Dentry, *inode, error) { - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, nil, err - } -afterSymlink: - nextVFSD, err := rp.ResolveComponent(vfsd) - if err != nil { - return nil, nil, err - } - if nextVFSD == nil { - // Since the Dentry tree is the sole source of truth for memfs, if it's - // not in the Dentry tree, it doesn't exist. - return nil, nil, syserror.ENOENT - } - nextInode := nextVFSD.Impl().(*dentry).inode - if symlink, ok := nextInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO: symlink traversals update access time - if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, nil, err - } - goto afterSymlink // don't check the current directory again - } - rp.Advance() - return nextVFSD, nextInode, nil -} - -// walkExistingLocked resolves rp to an existing file. -// -// walkExistingLocked is loosely analogous to Linux's -// fs/namei.c:path_lookupat(). -// -// Preconditions: filesystem.mu must be locked. -func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Done() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) - if err != nil { - return nil, nil, err - } - } - if rp.MustBeDir() && !inode.isDir() { - return nil, nil, syserror.ENOTDIR - } - return vfsd, inode, nil -} - -// walkParentDirLocked resolves all but the last path component of rp to an -// existing directory. It does not check that the returned directory is -// searchable by the provider of rp. -// -// walkParentDirLocked is loosely analogous to Linux's -// fs/namei.c:path_parentat(). -// -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) - if err != nil { - return nil, nil, err - } - } - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR - } - return vfsd, inode, nil -} - -// checkCreateLocked checks that a file named rp.Component() may be created in -// directory parentVFSD, then returns rp.Component(). -// -// Preconditions: filesystem.mu must be locked. parentInode == -// parentVFSD.Impl().(*dentry).inode. parentInode.isDir() == true. -func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *inode) (string, error) { - if err := parentInode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { - return "", err - } - pc := rp.Component() - if pc == "." || pc == ".." { - return "", syserror.EEXIST - } - childVFSD, err := rp.ResolveChild(parentVFSD, pc) - if err != nil { - return "", err - } - if childVFSD != nil { - return "", syserror.EEXIST - } - if parentVFSD.IsDisowned() { - return "", syserror.ENOENT - } - return pc, nil -} - -// checkDeleteLocked checks that the file represented by vfsd may be deleted. -func checkDeleteLocked(vfsd *vfs.Dentry) error { - parentVFSD := vfsd.Parent() - if parentVFSD == nil { - return syserror.EBUSY - } - if parentVFSD.IsDisowned() { - return syserror.ENOENT - } - return nil -} - -// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. -func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - vfsd, inode, err := walkExistingLocked(rp) - if err != nil { - return nil, err - } - if opts.CheckSearchable { - if !inode.isDir() { - return nil, syserror.ENOTDIR - } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, err - } - } - inode.incRef() // vfsd.IncRef(&fs.vfsfs) - return vfsd, nil -} - -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if rp.Mount() != vd.Mount() { - return syserror.EXDEV - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - d := vd.Dentry().Impl().(*dentry) - if d.inode.isDir() { - return syserror.EPERM - } - d.inode.incLinksLocked() - child := fs.newDentry(d.inode) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil -} - -// MkdirAt implements vfs.FilesystemImpl.MkdirAt. -func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - parentInode.incLinksLocked() // from child's ".." - return nil -} - -// MknodAt implements vfs.FilesystemImpl.MknodAt. -func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - _, err = checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - // TODO: actually implement mknod - return syserror.EPERM -} - -// OpenAt implements vfs.FilesystemImpl.OpenAt. -func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - // Filter out flags that are not supported by memfs. O_DIRECTORY and - // O_NOFOLLOW have no effect here (they're handled by VFS by setting - // appropriate bits in rp), but are returned by - // FileDescriptionImpl.StatusFlags(). - opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW - - if opts.Flags&linux.O_CREAT == 0 { - fs.mu.RLock() - defer fs.mu.RUnlock() - vfsd, inode, err := walkExistingLocked(rp) - if err != nil { - return nil, err - } - return inode.open(rp, vfsd, opts.Flags, false) - } - - mustCreate := opts.Flags&linux.O_EXCL != 0 - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - fs.mu.Lock() - defer fs.mu.Unlock() - if rp.Done() { - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - if mustCreate { - return nil, syserror.EEXIST - } - return inode.open(rp, vfsd, opts.Flags, false) - } -afterTrailingSymlink: - // Walk to the parent directory of the last path component. - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) - if err != nil { - return nil, err - } - } - if !inode.isDir() { - return nil, syserror.ENOTDIR - } - // Check for search permission in the parent directory. - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, err - } - // Reject attempts to open directories with O_CREAT. - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - pc := rp.Component() - if pc == "." || pc == ".." { - return nil, syserror.EISDIR - } - // Determine whether or not we need to create a file. - childVFSD, err := rp.ResolveChild(vfsd, pc) - if err != nil { - return nil, err - } - if childVFSD == nil { - // Already checked for searchability above; now check for writability. - if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { - return nil, err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return nil, err - } - defer rp.Mount().EndWrite() - // Create and open the child. - childInode := fs.newRegularFile(rp.Credentials(), opts.Mode) - child := fs.newDentry(childInode) - vfsd.InsertChild(&child.vfsd, pc) - inode.impl.(*directory).childList.PushBack(child) - return childInode.open(rp, &child.vfsd, opts.Flags, true) - } - // Open existing file or follow symlink. - if mustCreate { - return nil, syserror.EEXIST - } - childInode := childVFSD.Impl().(*dentry).inode - if symlink, ok := childInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO: symlink traversals update access time - if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, err - } - // rp.Final() may no longer be true since we now need to resolve the - // symlink target. - goto afterTrailingSymlink - } - return childInode.open(rp, childVFSD, opts.Flags, false) -} - -func (i *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(flags) - if !afterCreate { - if err := i.checkPermissions(rp.Credentials(), ats, i.isDir()); err != nil { - return nil, err - } - } - switch impl := i.impl.(type) { - case *regularFile: - var fd regularFileFD - fd.flags = flags - fd.readable = vfs.MayReadFileWithOpenFlags(flags) - fd.writable = vfs.MayWriteFileWithOpenFlags(flags) - if fd.writable { - if err := rp.Mount().CheckBeginWrite(); err != nil { - return nil, err - } - // Mount.EndWrite() is called by regularFileFD.Release(). - } - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) - if flags&linux.O_TRUNC != 0 { - impl.mu.Lock() - impl.data = impl.data[:0] - atomic.StoreInt64(&impl.dataLen, 0) - impl.mu.Unlock() - } - return &fd.vfsfd, nil - case *directory: - // Can't open directories writably. - if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR - } - var fd directoryFD - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) - fd.flags = flags - return &fd.vfsfd, nil - case *symlink: - // Can't open symlinks without O_PATH (which is unimplemented). - return nil, syserror.ELOOP - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. -func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - fs.mu.RLock() - _, inode, err := walkExistingLocked(rp) - fs.mu.RUnlock() - if err != nil { - return "", err - } - symlink, ok := inode.impl.(*symlink) - if !ok { - return "", syserror.EINVAL - } - return symlink.target, nil -} - -// RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error { - if rp.Done() { - return syserror.ENOENT - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - _, err = checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - // TODO: actually implement RenameAt - return syserror.EPERM -} - -// RmdirAt implements vfs.FilesystemImpl.RmdirAt. -func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - vfsd, inode, err := walkExistingLocked(rp) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(vfsd); err != nil { - return err - } - if !inode.isDir() { - return syserror.ENOTDIR - } - if vfsd.HasChildren() { - return syserror.ENOTEMPTY - } - if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { - return err - } - // Remove from parent directory's childList. - vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry)) - inode.decRef() - return nil -} - -// SetStatAt implements vfs.FilesystemImpl.SetStatAt. -func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - fs.mu.RLock() - _, _, err := walkExistingLocked(rp) - fs.mu.RUnlock() - if err != nil { - return err - } - if opts.Stat.Mask == 0 { - return nil - } - // TODO: implement inode.setStat - return syserror.EPERM -} - -// StatAt implements vfs.FilesystemImpl.StatAt. -func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - fs.mu.RLock() - _, inode, err := walkExistingLocked(rp) - fs.mu.RUnlock() - if err != nil { - return linux.Statx{}, err - } - var stat linux.Statx - inode.statTo(&stat) - return stat, nil -} - -// StatFSAt implements vfs.FilesystemImpl.StatFSAt. -func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - fs.mu.RLock() - _, _, err := walkExistingLocked(rp) - fs.mu.RUnlock() - if err != nil { - return linux.Statfs{}, err - } - // TODO: actually implement statfs - return linux.Statfs{}, syserror.ENOSYS -} - -// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. -func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil -} - -// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. -func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - vfsd, inode, err := walkExistingLocked(rp) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(vfsd); err != nil { - return err - } - if inode.isDir() { - return syserror.EISDIR - } - if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { - return err - } - // Remove from parent directory's childList. - vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry)) - inode.decLinksLocked() - return nil -} diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go deleted file mode 100644 index 59612da14..000000000 --- a/pkg/sentry/fsimpl/memfs/memfs.go +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package memfs provides a filesystem implementation that behaves like tmpfs: -// the Dentry tree is the sole source of truth for the state of the filesystem. -// -// memfs is intended primarily to demonstrate filesystem implementation -// patterns. Real uses cases for an in-memory filesystem should use tmpfs -// instead. -// -// Lock order: -// -// filesystem.mu -// regularFileFD.offMu -// regularFile.mu -// inode.mu -package memfs - -import ( - "fmt" - "sync" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// FilesystemType implements vfs.FilesystemType. -type FilesystemType struct{} - -// filesystem implements vfs.FilesystemImpl. -type filesystem struct { - vfsfs vfs.Filesystem - - // mu serializes changes to the Dentry tree. - mu sync.RWMutex - - nextInoMinusOne uint64 // accessed using atomic memory operations -} - -// NewFilesystem implements vfs.FilesystemType.NewFilesystem. -func (fstype FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - var fs filesystem - fs.vfsfs.Init(&fs) - root := fs.newDentry(fs.newDirectory(creds, 01777)) - return &fs.vfsfs, &root.vfsd, nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { -} - -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // All filesystem state is in-memory. - return nil -} - -// dentry implements vfs.DentryImpl. -type dentry struct { - vfsd vfs.Dentry - - // inode is the inode represented by this dentry. Multiple Dentries may - // share a single non-directory inode (with hard links). inode is - // immutable. - inode *inode - - // memfs doesn't count references on dentries; because the dentry tree is - // the sole source of truth, it is by definition always consistent with the - // state of the filesystem. However, it does count references on inodes, - // because inode resources are released when all references are dropped. - // (memfs doesn't really have resources to release, but we implement - // reference counting because tmpfs regular files will.) - - // dentryEntry (ugh) links dentries into their parent directory.childList. - dentryEntry -} - -func (fs *filesystem) newDentry(inode *inode) *dentry { - d := &dentry{ - inode: inode, - } - d.vfsd.Init(d) - return d -} - -// IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef(vfsfs *vfs.Filesystem) { - d.inode.incRef() -} - -// TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool { - return d.inode.tryIncRef() -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef(vfsfs *vfs.Filesystem) { - d.inode.decRef() -} - -// inode represents a filesystem object. -type inode struct { - // refs is a reference count. refs is accessed using atomic memory - // operations. - // - // A reference is held on all inodes that are reachable in the filesystem - // tree. For non-directories (which may have multiple hard links), this - // means that a reference is dropped when nlink reaches 0. For directories, - // nlink never reaches 0 due to the "." entry; instead, - // filesystem.RmdirAt() drops the reference. - refs int64 - - // Inode metadata; protected by mu and accessed using atomic memory - // operations unless otherwise specified. - mu sync.RWMutex - mode uint32 // excluding file type bits, which are based on impl - nlink uint32 // protected by filesystem.mu instead of inode.mu - uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic - gid uint32 // auth.KGID, but ... - ino uint64 // immutable - - impl interface{} // immutable -} - -func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode uint16) { - i.refs = 1 - i.mode = uint32(mode) - i.uid = uint32(creds.EffectiveKUID) - i.gid = uint32(creds.EffectiveKGID) - i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1) - // i.nlink initialized by caller - i.impl = impl -} - -// Preconditions: filesystem.mu must be locked for writing. -func (i *inode) incLinksLocked() { - if atomic.AddUint32(&i.nlink, 1) <= 1 { - panic("memfs.inode.incLinksLocked() called with no existing links") - } -} - -// Preconditions: filesystem.mu must be locked for writing. -func (i *inode) decLinksLocked() { - if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 { - i.decRef() - } else if nlink == ^uint32(0) { // negative overflow - panic("memfs.inode.decLinksLocked() called with no existing links") - } -} - -func (i *inode) incRef() { - if atomic.AddInt64(&i.refs, 1) <= 1 { - panic("memfs.inode.incRef() called without holding a reference") - } -} - -func (i *inode) tryIncRef() bool { - for { - refs := atomic.LoadInt64(&i.refs) - if refs == 0 { - return false - } - if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) { - return true - } - } -} - -func (i *inode) decRef() { - if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { - // This is unnecessary; it's mostly to simulate what tmpfs would do. - if regfile, ok := i.impl.(*regularFile); ok { - regfile.mu.Lock() - regfile.data = nil - atomic.StoreInt64(®file.dataLen, 0) - regfile.mu.Unlock() - } - } else if refs < 0 { - panic("memfs.inode.decRef() called without holding a reference") - } -} - -func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error { - return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))) -} - -// Go won't inline this function, and returning linux.Statx (which is quite -// big) means spending a lot of time in runtime.duffcopy(), so instead it's an -// output parameter. -func (i *inode) statTo(stat *linux.Statx) { - stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO - stat.Blksize = 1 // usermem.PageSize in tmpfs - stat.Nlink = atomic.LoadUint32(&i.nlink) - stat.UID = atomic.LoadUint32(&i.uid) - stat.GID = atomic.LoadUint32(&i.gid) - stat.Mode = uint16(atomic.LoadUint32(&i.mode)) - stat.Ino = i.ino - // TODO: device number - switch impl := i.impl.(type) { - case *regularFile: - stat.Mode |= linux.S_IFREG - stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS - stat.Size = uint64(atomic.LoadInt64(&impl.dataLen)) - // In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in - // a uint64 accessed using atomic memory operations to avoid taking - // locks). - stat.Blocks = allocatedBlocksForSize(stat.Size) - case *directory: - stat.Mode |= linux.S_IFDIR - case *symlink: - stat.Mode |= linux.S_IFLNK - stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS - stat.Size = uint64(len(impl.target)) - stat.Blocks = allocatedBlocksForSize(stat.Size) - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// allocatedBlocksForSize returns the number of 512B blocks needed to -// accommodate the given size in bytes, as appropriate for struct -// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block -// size is independent of the "preferred block size for I/O", struct -// stat::st_blksize and struct statx::stx_blksize.) -func allocatedBlocksForSize(size uint64) uint64 { - return (size + 511) / 512 -} - -func (i *inode) direntType() uint8 { - switch i.impl.(type) { - case *regularFile: - return linux.DT_REG - case *directory: - return linux.DT_DIR - case *symlink: - return linux.DT_LNK - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// fileDescription is embedded by memfs implementations of -// vfs.FileDescriptionImpl. -type fileDescription struct { - vfsfd vfs.FileDescription - - flags uint32 // status flags; immutable -} - -func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem) -} - -func (fd *fileDescription) inode() *inode { - return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode -} - -// StatusFlags implements vfs.FileDescriptionImpl.StatusFlags. -func (fd *fileDescription) StatusFlags(ctx context.Context) (uint32, error) { - return fd.flags, nil -} - -// SetStatusFlags implements vfs.FileDescriptionImpl.SetStatusFlags. -func (fd *fileDescription) SetStatusFlags(ctx context.Context, flags uint32) error { - // None of the flags settable by fcntl(F_SETFL) are supported, so this is a - // no-op. - return nil -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - var stat linux.Statx - fd.inode().statTo(&stat) - return stat, nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - if opts.Stat.Mask == 0 { - return nil - } - // TODO: implement inode.setStat - return syserror.EPERM -} diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go deleted file mode 100644 index 7a16d5719..000000000 --- a/pkg/sentry/fsimpl/memfs/regular_file.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "io" - "sync" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type regularFile struct { - inode inode - - mu sync.RWMutex - data []byte - // dataLen is len(data), but accessed using atomic memory operations to - // avoid locking in inode.stat(). - dataLen int64 -} - -func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *inode { - file := ®ularFile{} - file.inode.init(file, fs, creds, mode) - file.inode.nlink = 1 // from parent directory - return &file.inode -} - -type regularFileFD struct { - fileDescription - vfs.FileDescriptionDefaultImpl - - // These are immutable. - readable bool - writable bool - - // off is the file offset. off is accessed using atomic memory operations. - // offMu serializes operations that may mutate off. - off int64 - offMu sync.Mutex -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() { - if fd.writable { - fd.vfsfd.VirtualDentry().Mount().EndWrite() - } -} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - if !fd.readable { - return 0, syserror.EINVAL - } - f := fd.inode().impl.(*regularFile) - f.mu.RLock() - if offset >= int64(len(f.data)) { - f.mu.RUnlock() - return 0, io.EOF - } - n, err := dst.CopyOut(ctx, f.data[offset:]) - f.mu.RUnlock() - return int64(n), err -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - if !fd.writable { - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - srclen := src.NumBytes() - if srclen == 0 { - return 0, nil - } - f := fd.inode().impl.(*regularFile) - f.mu.Lock() - end := offset + srclen - if end < offset { - // Overflow. - f.mu.Unlock() - return 0, syserror.EFBIG - } - if end > f.dataLen { - f.data = append(f.data, make([]byte, end-f.dataLen)...) - atomic.StoreInt64(&f.dataLen, end) - } - n, err := src.CopyIn(ctx, f.data[offset:end]) - f.mu.Unlock() - return int64(n), err -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.offMu.Lock() - defer fd.offMu.Unlock() - switch whence { - case linux.SEEK_SET: - // use offset as specified - case linux.SEEK_CUR: - offset += fd.off - case linux.SEEK_END: - offset += atomic.LoadInt64(&fd.inode().impl.(*regularFile).dataLen) - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *regularFileFD) Sync(ctx context.Context) error { - return nil -} diff --git a/pkg/sentry/fsimpl/memfs/symlink.go b/pkg/sentry/fsimpl/memfs/symlink.go deleted file mode 100644 index b2ac2cbeb..000000000 --- a/pkg/sentry/fsimpl/memfs/symlink.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -type symlink struct { - inode inode - target string // immutable -} - -func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode { - link := &symlink{ - target: target, - } - link.inode.init(link, fs, creds, 0777) - link.inode.nlink = 1 // from parent directory - return &link.inode -} - -// O_PATH is unimplemented, so there's no way to get a FileDescription -// representing a symlink yet. diff --git a/pkg/sentry/hostcpu/BUILD b/pkg/sentry/hostcpu/BUILD deleted file mode 100644 index f989f2f8b..000000000 --- a/pkg/sentry/hostcpu/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "hostcpu", - srcs = [ - "getcpu_amd64.s", - "hostcpu.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/hostcpu", - visibility = ["//:sandbox"], -) - -go_test( - name = "hostcpu_test", - size = "small", - srcs = ["hostcpu_test.go"], - embed = [":hostcpu"], -) diff --git a/pkg/sentry/hostcpu/hostcpu_state_autogen.go b/pkg/sentry/hostcpu/hostcpu_state_autogen.go new file mode 100755 index 000000000..f04a56ec0 --- /dev/null +++ b/pkg/sentry/hostcpu/hostcpu_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package hostcpu + diff --git a/pkg/sentry/hostcpu/hostcpu_test.go b/pkg/sentry/hostcpu/hostcpu_test.go deleted file mode 100644 index 7d6885c9e..000000000 --- a/pkg/sentry/hostcpu/hostcpu_test.go +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package hostcpu - -import ( - "fmt" - "testing" -) - -func TestMaxValueInLinuxBitmap(t *testing.T) { - for _, test := range []struct { - str string - max uint64 - }{ - {"0", 0}, - {"0\n", 0}, - {"0,2", 2}, - {"0-63", 63}, - {"0-3,8-11", 11}, - } { - t.Run(fmt.Sprintf("%q", test.str), func(t *testing.T) { - max, err := maxValueInLinuxBitmap(test.str) - if err != nil || max != test.max { - t.Errorf("maxValueInLinuxBitmap: got (%d, %v), wanted (%d, nil)", max, err, test.max) - } - }) - } -} - -func TestMaxValueInLinuxBitmapErrors(t *testing.T) { - for _, str := range []string{"", "\n"} { - t.Run(fmt.Sprintf("%q", str), func(t *testing.T) { - max, err := maxValueInLinuxBitmap(str) - if err == nil { - t.Errorf("maxValueInLinuxBitmap: got (%d, nil), wanted (_, error)", max) - } - t.Log(err) - }) - } -} diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD deleted file mode 100644 index 67831d5a1..000000000 --- a/pkg/sentry/hostmm/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "hostmm", - srcs = [ - "cgroup.go", - "hostmm.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/hostmm", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/fd", - "//pkg/log", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/hostmm/hostmm_state_autogen.go b/pkg/sentry/hostmm/hostmm_state_autogen.go new file mode 100755 index 000000000..730de5101 --- /dev/null +++ b/pkg/sentry/hostmm/hostmm_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package hostmm + diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD deleted file mode 100644 index 184b566d9..000000000 --- a/pkg/sentry/inet/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "inet", - srcs = [ - "context.go", - "inet.go", - "test_stack.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/inet", - deps = ["//pkg/sentry/context"], -) diff --git a/pkg/sentry/inet/inet_state_autogen.go b/pkg/sentry/inet/inet_state_autogen.go new file mode 100755 index 000000000..00f40556c --- /dev/null +++ b/pkg/sentry/inet/inet_state_autogen.go @@ -0,0 +1,26 @@ +// automatically generated by stateify. + +package inet + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *TCPBufferSize) beforeSave() {} +func (x *TCPBufferSize) save(m state.Map) { + x.beforeSave() + m.Save("Min", &x.Min) + m.Save("Default", &x.Default) + m.Save("Max", &x.Max) +} + +func (x *TCPBufferSize) afterLoad() {} +func (x *TCPBufferSize) load(m state.Map) { + m.Load("Min", &x.Min) + m.Load("Default", &x.Default) + m.Load("Max", &x.Max) +} + +func init() { + state.Register("inet.TCPBufferSize", (*TCPBufferSize)(nil), state.Fns{Save: (*TCPBufferSize).save, Load: (*TCPBufferSize).load}) +} diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD deleted file mode 100644 index 41bee9a22..000000000 --- a/pkg/sentry/kernel/BUILD +++ /dev/null @@ -1,239 +0,0 @@ -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "pending_signals_list", - out = "pending_signals_list.go", - package = "kernel", - prefix = "pendingSignal", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*pendingSignal", - "Linker": "*pendingSignal", - }, -) - -go_template_instance( - name = "process_group_list", - out = "process_group_list.go", - package = "kernel", - prefix = "processGroup", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*ProcessGroup", - "Linker": "*ProcessGroup", - }, -) - -go_template_instance( - name = "seqatomic_taskgoroutineschedinfo", - out = "seqatomic_taskgoroutineschedinfo_unsafe.go", - package = "kernel", - suffix = "TaskGoroutineSchedInfo", - template = "//third_party/gvsync:generic_seqatomic", - types = { - "Value": "TaskGoroutineSchedInfo", - }, -) - -go_template_instance( - name = "session_list", - out = "session_list.go", - package = "kernel", - prefix = "session", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Session", - "Linker": "*Session", - }, -) - -go_template_instance( - name = "task_list", - out = "task_list.go", - package = "kernel", - prefix = "task", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Task", - "Linker": "*Task", - }, -) - -go_template_instance( - name = "socket_list", - out = "socket_list.go", - package = "kernel", - prefix = "socket", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*SocketEntry", - "Linker": "*SocketEntry", - }, -) - -proto_library( - name = "uncaught_signal_proto", - srcs = ["uncaught_signal.proto"], - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_proto"], -) - -go_proto_library( - name = "uncaught_signal_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto", - proto = ":uncaught_signal_proto", - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_go_proto"], -) - -go_library( - name = "kernel", - srcs = [ - "abstract_socket_namespace.go", - "context.go", - "fd_table.go", - "fd_table_unsafe.go", - "fs_context.go", - "ipc_namespace.go", - "kernel.go", - "kernel_state.go", - "pending_signals.go", - "pending_signals_list.go", - "pending_signals_state.go", - "posixtimer.go", - "process_group_list.go", - "ptrace.go", - "ptrace_amd64.go", - "ptrace_arm64.go", - "rseq.go", - "seccomp.go", - "seqatomic_taskgoroutineschedinfo_unsafe.go", - "session_list.go", - "sessions.go", - "signal.go", - "signal_handlers.go", - "socket_list.go", - "syscalls.go", - "syscalls_state.go", - "syslog.go", - "task.go", - "task_acct.go", - "task_block.go", - "task_clone.go", - "task_context.go", - "task_exec.go", - "task_exit.go", - "task_futex.go", - "task_identity.go", - "task_list.go", - "task_log.go", - "task_net.go", - "task_run.go", - "task_sched.go", - "task_signals.go", - "task_start.go", - "task_stop.go", - "task_syscall.go", - "task_usermem.go", - "thread_group.go", - "threads.go", - "timekeeper.go", - "timekeeper_state.go", - "tty.go", - "uts_namespace.go", - "vdso.go", - "version.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel", - imports = [ - "gvisor.dev/gvisor/pkg/bpf", - "gvisor.dev/gvisor/pkg/sentry/device", - "gvisor.dev/gvisor/pkg/tcpip", - ], - visibility = ["//:sandbox"], - deps = [ - ":uncaught_signal_go_proto", - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/binary", - "//pkg/bits", - "//pkg/bpf", - "//pkg/cpuid", - "//pkg/eventchannel", - "//pkg/log", - "//pkg/metric", - "//pkg/refs", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fs/timerfd", - "//pkg/sentry/hostcpu", - "//pkg/sentry/inet", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/futex", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/kernel/semaphore", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/loader", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/safemem", - "//pkg/sentry/socket/netlink/port", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/time", - "//pkg/sentry/unimpl", - "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/state", - "//pkg/state/statefile", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/stack", - "//pkg/waiter", - "//third_party/gvsync", - ], -) - -go_test( - name = "kernel_test", - size = "small", - srcs = [ - "fd_table_test.go", - "table_test.go", - "task_test.go", - "timekeeper_test.go", - ], - embed = [":kernel"], - deps = [ - "//pkg/abi", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/filetest", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/limits", - "//pkg/sentry/pgalloc", - "//pkg/sentry/time", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/README.md b/pkg/sentry/kernel/README.md deleted file mode 100644 index 427311be8..000000000 --- a/pkg/sentry/kernel/README.md +++ /dev/null @@ -1,108 +0,0 @@ -This package contains: - -- A (partial) emulation of the "core Linux kernel", which governs task - execution and scheduling, system call dispatch, and signal handling. See - below for details. - -- The top-level interface for the sentry's Linux kernel emulation in general, - used by the `main` function of all versions of the sentry. This interface - revolves around the `Env` type (defined in `kernel.go`). - -# Background - -In Linux, each schedulable context is referred to interchangeably as a "task" or -"thread". Tasks can be divided into userspace and kernel tasks. In the sentry, -scheduling is managed by the Go runtime, so each schedulable context is a -goroutine; only "userspace" (application) contexts are referred to as tasks, and -represented by Task objects. (From this point forward, "task" refers to the -sentry's notion of a task unless otherwise specified.) - -At a high level, Linux application threads can be thought of as repeating a "run -loop": - -- Some amount of application code is executed in userspace. - -- A trap (explicit syscall invocation, hardware interrupt or exception, etc.) - causes control flow to switch to the kernel. - -- Some amount of kernel code is executed in kernelspace, e.g. to handle the - cause of the trap. - -- The kernel "returns from the trap" into application code. - -Analogously, each task in the sentry is associated with a *task goroutine* that -executes that task's run loop (`Task.run` in `task_run.go`). However, the -sentry's task run loop differs in structure in order to support saving execution -state to, and resuming execution from, checkpoints. - -While in kernelspace, a Linux thread can be descheduled (cease execution) in a -variety of ways: - -- It can yield or be preempted, becoming temporarily descheduled but still - runnable. At present, the sentry delegates scheduling of runnable threads to - the Go runtime. - -- It can exit, becoming permanently descheduled. The sentry's equivalent is - returning from `Task.run`, terminating the task goroutine. - -- It can enter interruptible sleep, a state in which it can be woken by a - caller-defined wakeup or the receipt of a signal. In the sentry, - interruptible sleep (which is ambiguously referred to as *blocking*) is - implemented by making all events that can end blocking (including signal - notifications) communicated via Go channels and using `select` to multiplex - wakeup sources; see `task_block.go`. - -- It can enter uninterruptible sleep, a state in which it can only be woken by - a caller-defined wakeup. Killable sleep is a closely related variant in - which the task can also be woken by SIGKILL. (These definitions also include - Linux's "group-stopped" (`TASK_STOPPED`) and "ptrace-stopped" - (`TASK_TRACED`) states.) - -To maximize compatibility with Linux, sentry checkpointing appears as a spurious -signal-delivery interrupt on all tasks; interrupted system calls return `EINTR` -or are automatically restarted as usual. However, these semantics require that -uninterruptible and killable sleeps do not appear to be interrupted. In other -words, the state of the task, including its progress through the interrupted -operation, must be preserved by checkpointing. For many such sleeps, the wakeup -condition is application-controlled, making it infeasible to wait for the sleep -to end before checkpointing. Instead, we must support checkpointing progress -through sleeping operations. - -# Implementation - -We break the task's control flow graph into *states*, delimited by: - -1. Points where uninterruptible and killable sleeps may occur. For example, - there exists a state boundary between signal dequeueing and signal delivery - because there may be an intervening ptrace signal-delivery-stop. - -2. Points where sleep-induced branches may "rejoin" normal execution. For - example, the syscall exit state exists because it can be reached immediately - following a synchronous syscall, or after a task that is sleeping in - `execve()` or `vfork()` resumes execution. - -3. Points containing large branches. This is strictly for organizational - purposes. For example, the state that processes interrupt-signaled - conditions is kept separate from the main "app" state to reduce the size of - the latter. - -4. `SyscallReinvoke`, which does not correspond to anything in Linux, and - exists solely to serve the autosave feature. - -![dot -Tpng -Goverlap=false -orun_states.png run_states.dot](g3doc/run_states.png "Task control flow graph") - -States before which a stop may occur are represented as implementations of the -`taskRunState` interface named `run(state)`, allowing them to be saved and -restored. States that cannot be immediately preceded by a stop are simply `Task` -methods named `do(state)`. - -Conditions that can require task goroutines to cease execution for unknown -lengths of time are called *stops*. Stops are divided into *internal stops*, -which are stops whose start and end conditions are implemented within the -sentry, and *external stops*, which are stops whose start and end conditions are -not known to the sentry. Hence all uninterruptible and killable sleeps are -internal stops, and the existence of a pending checkpoint operation is an -external stop. Internal stops are reified into instances of the `TaskStop` type, -while external stops are merely counted. The task run loop alternates between -checking for stops and advancing the task's state. This allows checkpointing to -hold tasks in a stopped state while waiting for all tasks in the system to stop. diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD deleted file mode 100644 index 1d00a6310..000000000 --- a/pkg/sentry/kernel/auth/BUILD +++ /dev/null @@ -1,69 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") - -go_template_instance( - name = "atomicptr_credentials", - out = "atomicptr_credentials_unsafe.go", - package = "auth", - suffix = "Credentials", - template = "//third_party/gvsync:generic_atomicptr", - types = { - "Value": "Credentials", - }, -) - -go_template_instance( - name = "id_map_range", - out = "id_map_range.go", - package = "auth", - prefix = "idMap", - template = "//pkg/segment:generic_range", - types = { - "T": "uint32", - }, -) - -go_template_instance( - name = "id_map_set", - out = "id_map_set.go", - consts = { - "minDegree": "3", - }, - package = "auth", - prefix = "idMap", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint32", - "Range": "idMapRange", - "Value": "uint32", - "Functions": "idMapFunctions", - }, -) - -go_library( - name = "auth", - srcs = [ - "atomicptr_credentials_unsafe.go", - "auth.go", - "capability_set.go", - "context.go", - "credentials.go", - "id.go", - "id_map.go", - "id_map_functions.go", - "id_map_range.go", - "id_map_set.go", - "user_namespace.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/auth", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/bits", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go new file mode 100755 index 000000000..4535c958f --- /dev/null +++ b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go @@ -0,0 +1,37 @@ +package auth + +import ( + "sync/atomic" + "unsafe" +) + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtrCredentials struct { + ptr unsafe.Pointer `state:".(*Credentials)"` +} + +func (p *AtomicPtrCredentials) savePtr() *Credentials { + return p.Load() +} + +func (p *AtomicPtrCredentials) loadPtr(v *Credentials) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtrCredentials) Load() *Credentials { + return (*Credentials)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtrCredentials) Store(x *Credentials) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sentry/kernel/auth/auth_state_autogen.go b/pkg/sentry/kernel/auth/auth_state_autogen.go new file mode 100755 index 000000000..ef5f8d957 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth_state_autogen.go @@ -0,0 +1,164 @@ +// automatically generated by stateify. + +package auth + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *AtomicPtrCredentials) beforeSave() {} +func (x *AtomicPtrCredentials) save(m state.Map) { + x.beforeSave() + var ptr *Credentials = x.savePtr() + m.SaveValue("ptr", ptr) +} + +func (x *AtomicPtrCredentials) afterLoad() {} +func (x *AtomicPtrCredentials) load(m state.Map) { + m.LoadValue("ptr", new(*Credentials), func(y interface{}) { x.loadPtr(y.(*Credentials)) }) +} + +func (x *Credentials) beforeSave() {} +func (x *Credentials) save(m state.Map) { + x.beforeSave() + m.Save("RealKUID", &x.RealKUID) + m.Save("EffectiveKUID", &x.EffectiveKUID) + m.Save("SavedKUID", &x.SavedKUID) + m.Save("RealKGID", &x.RealKGID) + m.Save("EffectiveKGID", &x.EffectiveKGID) + m.Save("SavedKGID", &x.SavedKGID) + m.Save("ExtraKGIDs", &x.ExtraKGIDs) + m.Save("PermittedCaps", &x.PermittedCaps) + m.Save("InheritableCaps", &x.InheritableCaps) + m.Save("EffectiveCaps", &x.EffectiveCaps) + m.Save("BoundingCaps", &x.BoundingCaps) + m.Save("KeepCaps", &x.KeepCaps) + m.Save("UserNamespace", &x.UserNamespace) +} + +func (x *Credentials) afterLoad() {} +func (x *Credentials) load(m state.Map) { + m.Load("RealKUID", &x.RealKUID) + m.Load("EffectiveKUID", &x.EffectiveKUID) + m.Load("SavedKUID", &x.SavedKUID) + m.Load("RealKGID", &x.RealKGID) + m.Load("EffectiveKGID", &x.EffectiveKGID) + m.Load("SavedKGID", &x.SavedKGID) + m.Load("ExtraKGIDs", &x.ExtraKGIDs) + m.Load("PermittedCaps", &x.PermittedCaps) + m.Load("InheritableCaps", &x.InheritableCaps) + m.Load("EffectiveCaps", &x.EffectiveCaps) + m.Load("BoundingCaps", &x.BoundingCaps) + m.Load("KeepCaps", &x.KeepCaps) + m.Load("UserNamespace", &x.UserNamespace) +} + +func (x *IDMapEntry) beforeSave() {} +func (x *IDMapEntry) save(m state.Map) { + x.beforeSave() + m.Save("FirstID", &x.FirstID) + m.Save("FirstParentID", &x.FirstParentID) + m.Save("Length", &x.Length) +} + +func (x *IDMapEntry) afterLoad() {} +func (x *IDMapEntry) load(m state.Map) { + m.Load("FirstID", &x.FirstID) + m.Load("FirstParentID", &x.FirstParentID) + m.Load("Length", &x.Length) +} + +func (x *idMapRange) beforeSave() {} +func (x *idMapRange) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) +} + +func (x *idMapRange) afterLoad() {} +func (x *idMapRange) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) +} + +func (x *idMapSet) beforeSave() {} +func (x *idMapSet) save(m state.Map) { + x.beforeSave() + var root *idMapSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *idMapSet) afterLoad() {} +func (x *idMapSet) load(m state.Map) { + m.LoadValue("root", new(*idMapSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*idMapSegmentDataSlices)) }) +} + +func (x *idMapnode) beforeSave() {} +func (x *idMapnode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *idMapnode) afterLoad() {} +func (x *idMapnode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *idMapSegmentDataSlices) beforeSave() {} +func (x *idMapSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *idMapSegmentDataSlices) afterLoad() {} +func (x *idMapSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *UserNamespace) beforeSave() {} +func (x *UserNamespace) save(m state.Map) { + x.beforeSave() + m.Save("parent", &x.parent) + m.Save("owner", &x.owner) + m.Save("uidMapFromParent", &x.uidMapFromParent) + m.Save("uidMapToParent", &x.uidMapToParent) + m.Save("gidMapFromParent", &x.gidMapFromParent) + m.Save("gidMapToParent", &x.gidMapToParent) +} + +func (x *UserNamespace) afterLoad() {} +func (x *UserNamespace) load(m state.Map) { + m.Load("parent", &x.parent) + m.Load("owner", &x.owner) + m.Load("uidMapFromParent", &x.uidMapFromParent) + m.Load("uidMapToParent", &x.uidMapToParent) + m.Load("gidMapFromParent", &x.gidMapFromParent) + m.Load("gidMapToParent", &x.gidMapToParent) +} + +func init() { + state.Register("auth.AtomicPtrCredentials", (*AtomicPtrCredentials)(nil), state.Fns{Save: (*AtomicPtrCredentials).save, Load: (*AtomicPtrCredentials).load}) + state.Register("auth.Credentials", (*Credentials)(nil), state.Fns{Save: (*Credentials).save, Load: (*Credentials).load}) + state.Register("auth.IDMapEntry", (*IDMapEntry)(nil), state.Fns{Save: (*IDMapEntry).save, Load: (*IDMapEntry).load}) + state.Register("auth.idMapRange", (*idMapRange)(nil), state.Fns{Save: (*idMapRange).save, Load: (*idMapRange).load}) + state.Register("auth.idMapSet", (*idMapSet)(nil), state.Fns{Save: (*idMapSet).save, Load: (*idMapSet).load}) + state.Register("auth.idMapnode", (*idMapnode)(nil), state.Fns{Save: (*idMapnode).save, Load: (*idMapnode).load}) + state.Register("auth.idMapSegmentDataSlices", (*idMapSegmentDataSlices)(nil), state.Fns{Save: (*idMapSegmentDataSlices).save, Load: (*idMapSegmentDataSlices).load}) + state.Register("auth.UserNamespace", (*UserNamespace)(nil), state.Fns{Save: (*UserNamespace).save, Load: (*UserNamespace).load}) +} diff --git a/pkg/sentry/kernel/auth/id_map_range.go b/pkg/sentry/kernel/auth/id_map_range.go new file mode 100755 index 000000000..833fa3518 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_range.go @@ -0,0 +1,62 @@ +package auth + +// A Range represents a contiguous range of T. +// +// +stateify savable +type idMapRange struct { + // Start is the inclusive start of the range. + Start uint32 + + // End is the exclusive end of the range. + End uint32 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r idMapRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r idMapRange) Length() uint32 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r idMapRange) Contains(x uint32) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r idMapRange) Overlaps(r2 idMapRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r idMapRange) IsSupersetOf(r2 idMapRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r idMapRange) Intersect(r2 idMapRange) idMapRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r idMapRange) CanSplitAt(x uint32) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/kernel/auth/id_map_set.go b/pkg/sentry/kernel/auth/id_map_set.go new file mode 100755 index 000000000..73a17f281 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_set.go @@ -0,0 +1,1270 @@ +package auth + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + idMapminDegree = 3 + + idMapmaxDegree = 2 * idMapminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type idMapSet struct { + root idMapnode `state:".(*idMapSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *idMapSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *idMapSet) IsEmptyRange(r idMapRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *idMapSet) Span() uint32 { + var sz uint32 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *idMapSet) SpanRange(r idMapRange) uint32 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint32 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *idMapSet) FirstSegment() idMapIterator { + if s.root.nrSegments == 0 { + return idMapIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *idMapSet) LastSegment() idMapIterator { + if s.root.nrSegments == 0 { + return idMapIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *idMapSet) FirstGap() idMapGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return idMapGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *idMapSet) LastGap() idMapGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return idMapGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *idMapSet) Find(key uint32) (idMapIterator, idMapGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return idMapIterator{n, i}, idMapGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return idMapIterator{}, idMapGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *idMapSet) FindSegment(key uint32) idMapIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *idMapSet) LowerBoundSegment(min uint32) idMapIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *idMapSet) UpperBoundSegment(max uint32) idMapIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *idMapSet) FindGap(key uint32) idMapGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *idMapSet) LowerBoundGap(min uint32) idMapGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *idMapSet) UpperBoundGap(max uint32) idMapGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *idMapSet) Add(r idMapRange, val uint32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *idMapSet) AddWithoutMerging(r idMapRange, val uint32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *idMapSet) Insert(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (idMapFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (idMapFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (idMapFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *idMapSet) InsertWithoutMerging(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *idMapSet) InsertWithoutMergingUnchecked(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return idMapIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *idMapSet) Remove(seg idMapIterator) idMapGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + idMapFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(idMapGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *idMapSet) RemoveAll() { + s.root = idMapnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *idMapSet) RemoveRange(r idMapRange) idMapGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *idMapSet) Merge(first, second idMapIterator) idMapIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *idMapSet) MergeUnchecked(first, second idMapIterator) idMapIterator { + if first.End() == second.Start() { + if mval, ok := (idMapFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return idMapIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *idMapSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *idMapSet) MergeRange(r idMapRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *idMapSet) MergeAdjacent(r idMapRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *idMapSet) Split(seg idMapIterator, split uint32) (idMapIterator, idMapIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *idMapSet) SplitUnchecked(seg idMapIterator, split uint32) (idMapIterator, idMapIterator) { + val1, val2 := (idMapFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), idMapRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *idMapSet) SplitAt(split uint32) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *idMapSet) Isolate(seg idMapIterator, r idMapRange) idMapIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *idMapSet) ApplyContiguous(r idMapRange, fn func(seg idMapIterator)) idMapGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return idMapGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return idMapGapIterator{} + } + } +} + +// +stateify savable +type idMapnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *idMapnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [idMapmaxDegree - 1]idMapRange + values [idMapmaxDegree - 1]uint32 + children [idMapmaxDegree]*idMapnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *idMapnode) firstSegment() idMapIterator { + for n.hasChildren { + n = n.children[0] + } + return idMapIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *idMapnode) lastSegment() idMapIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return idMapIterator{n, n.nrSegments - 1} +} + +func (n *idMapnode) prevSibling() *idMapnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *idMapnode) nextSibling() *idMapnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *idMapnode) rebalanceBeforeInsert(gap idMapGapIterator) idMapGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < idMapmaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:idMapminDegree-1], n.keys[:idMapminDegree-1]) + copy(left.values[:idMapminDegree-1], n.values[:idMapminDegree-1]) + copy(right.keys[:idMapminDegree-1], n.keys[idMapminDegree:]) + copy(right.values[:idMapminDegree-1], n.values[idMapminDegree:]) + n.keys[0], n.values[0] = n.keys[idMapminDegree-1], n.values[idMapminDegree-1] + idMapzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:idMapminDegree], n.children[:idMapminDegree]) + copy(right.children[:idMapminDegree], n.children[idMapminDegree:]) + idMapzeroNodeSlice(n.children[2:]) + for i := 0; i < idMapminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < idMapminDegree { + return idMapGapIterator{left, gap.index} + } + return idMapGapIterator{right, gap.index - idMapminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[idMapminDegree-1], n.values[idMapminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &idMapnode{ + nrSegments: idMapminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:idMapminDegree-1], n.keys[idMapminDegree:]) + copy(sibling.values[:idMapminDegree-1], n.values[idMapminDegree:]) + idMapzeroValueSlice(n.values[idMapminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:idMapminDegree], n.children[idMapminDegree:]) + idMapzeroNodeSlice(n.children[idMapminDegree:]) + for i := 0; i < idMapminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = idMapminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < idMapminDegree { + return gap + } + return idMapGapIterator{sibling, gap.index - idMapminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator { + for { + if n.nrSegments >= idMapminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= idMapminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + idMapFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return idMapGapIterator{n, 0} + } + if gap.node == n { + return idMapGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= idMapminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + idMapFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return idMapGapIterator{n, n.nrSegments} + } + return idMapGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return idMapGapIterator{p, gap.index} + } + if gap.node == right { + return idMapGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *idMapnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = idMapGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + idMapFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type idMapIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *idMapnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg idMapIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg idMapIterator) Range() idMapRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg idMapIterator) Start() uint32 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg idMapIterator) End() uint32 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg idMapIterator) SetRangeUnchecked(r idMapRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetRange(r idMapRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg idMapIterator) SetStartUnchecked(start uint32) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetStart(start uint32) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg idMapIterator) SetEndUnchecked(end uint32) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg idMapIterator) SetEnd(end uint32) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg idMapIterator) Value() uint32 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg idMapIterator) ValuePtr() *uint32 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg idMapIterator) SetValue(val uint32) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg idMapIterator) PrevSegment() idMapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return idMapIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return idMapIterator{} + } + return idMapsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg idMapIterator) NextSegment() idMapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return idMapIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return idMapIterator{} + } + return idMapsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg idMapIterator) PrevGap() idMapGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return idMapGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg idMapIterator) NextGap() idMapGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return idMapGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg idMapIterator) PrevNonEmpty() (idMapIterator, idMapGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return idMapIterator{}, gap + } + return gap.PrevSegment(), idMapGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg idMapIterator) NextNonEmpty() (idMapIterator, idMapGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return idMapIterator{}, gap + } + return gap.NextSegment(), idMapGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type idMapGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *idMapnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap idMapGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap idMapGapIterator) Range() idMapRange { + return idMapRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap idMapGapIterator) Start() uint32 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return idMapFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap idMapGapIterator) End() uint32 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return idMapFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap idMapGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap idMapGapIterator) PrevSegment() idMapIterator { + return idMapsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap idMapGapIterator) NextSegment() idMapIterator { + return idMapsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap idMapGapIterator) PrevGap() idMapGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return idMapGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap idMapGapIterator) NextGap() idMapGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return idMapGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func idMapsegmentBeforePosition(n *idMapnode, i int) idMapIterator { + for i == 0 { + if n.parent == nil { + return idMapIterator{} + } + n, i = n.parent, n.parentIndex + } + return idMapIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func idMapsegmentAfterPosition(n *idMapnode, i int) idMapIterator { + for i == n.nrSegments { + if n.parent == nil { + return idMapIterator{} + } + n, i = n.parent, n.parentIndex + } + return idMapIterator{n, i} +} + +func idMapzeroValueSlice(slice []uint32) { + + for i := range slice { + idMapFunctions{}.ClearValue(&slice[i]) + } +} + +func idMapzeroNodeSlice(slice []*idMapnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *idMapSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *idMapnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *idMapnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type idMapSegmentDataSlices struct { + Start []uint32 + End []uint32 + Values []uint32 +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *idMapSet) ExportSortedSlices() *idMapSegmentDataSlices { + var sds idMapSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *idMapSet) ImportSortedSlices(sds *idMapSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := idMapRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *idMapSet) saveRoot() *idMapSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *idMapSet) loadRoot(sds *idMapSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD deleted file mode 100644 index bec13a3d9..000000000 --- a/pkg/sentry/kernel/contexttest/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "contexttest", - testonly = 1, - srcs = ["contexttest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/kernel", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - ], -) diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go deleted file mode 100644 index 82f9d8922..000000000 --- a/pkg/sentry/kernel/contexttest/contexttest.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package contexttest provides a test context.Context which includes -// a dummy kernel pointing to a valid platform. -package contexttest - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" -) - -// Context returns a Context that may be used in tests. Uses ptrace as the -// platform.Platform, and provides a stub kernel that only serves to point to -// the platform. -func Context(tb testing.TB) context.Context { - ctx := contexttest.Context(tb) - k := &kernel.Kernel{ - Platform: platform.FromContext(ctx), - } - k.SetMemoryFile(pgalloc.MemoryFileFromContext(ctx)) - ctx.(*contexttest.TestContext).RegisterValue(kernel.CtxKernel, k) - return ctx -} diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD deleted file mode 100644 index f46c43128..000000000 --- a/pkg/sentry/kernel/epoll/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "epoll_list", - out = "epoll_list.go", - package = "epoll", - prefix = "pollEntry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*pollEntry", - "Linker": "*pollEntry", - }, -) - -go_library( - name = "epoll", - srcs = [ - "epoll.go", - "epoll_list.go", - "epoll_state.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/epoll", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "epoll_test", - size = "small", - srcs = [ - "epoll_test.go", - ], - embed = [":epoll"], - deps = [ - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs/filetest", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/epoll/epoll_list.go b/pkg/sentry/kernel/epoll/epoll_list.go new file mode 100755 index 000000000..94d5c9e57 --- /dev/null +++ b/pkg/sentry/kernel/epoll/epoll_list.go @@ -0,0 +1,173 @@ +package epoll + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type pollEntryElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (pollEntryElementMapper) linkerFor(elem *pollEntry) *pollEntry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type pollEntryList struct { + head *pollEntry + tail *pollEntry +} + +// Reset resets list l to the empty state. +func (l *pollEntryList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *pollEntryList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *pollEntryList) Front() *pollEntry { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *pollEntryList) Back() *pollEntry { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *pollEntryList) PushFront(e *pollEntry) { + pollEntryElementMapper{}.linkerFor(e).SetNext(l.head) + pollEntryElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + pollEntryElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *pollEntryList) PushBack(e *pollEntry) { + pollEntryElementMapper{}.linkerFor(e).SetNext(nil) + pollEntryElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + pollEntryElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *pollEntryList) PushBackList(m *pollEntryList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + pollEntryElementMapper{}.linkerFor(l.tail).SetNext(m.head) + pollEntryElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *pollEntryList) InsertAfter(b, e *pollEntry) { + a := pollEntryElementMapper{}.linkerFor(b).Next() + pollEntryElementMapper{}.linkerFor(e).SetNext(a) + pollEntryElementMapper{}.linkerFor(e).SetPrev(b) + pollEntryElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + pollEntryElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *pollEntryList) InsertBefore(a, e *pollEntry) { + b := pollEntryElementMapper{}.linkerFor(a).Prev() + pollEntryElementMapper{}.linkerFor(e).SetNext(a) + pollEntryElementMapper{}.linkerFor(e).SetPrev(b) + pollEntryElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + pollEntryElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *pollEntryList) Remove(e *pollEntry) { + prev := pollEntryElementMapper{}.linkerFor(e).Prev() + next := pollEntryElementMapper{}.linkerFor(e).Next() + + if prev != nil { + pollEntryElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + pollEntryElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type pollEntryEntry struct { + next *pollEntry + prev *pollEntry +} + +// Next returns the entry that follows e in the list. +func (e *pollEntryEntry) Next() *pollEntry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *pollEntryEntry) Prev() *pollEntry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *pollEntryEntry) SetNext(elem *pollEntry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *pollEntryEntry) SetPrev(elem *pollEntry) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/epoll/epoll_state_autogen.go b/pkg/sentry/kernel/epoll/epoll_state_autogen.go new file mode 100755 index 000000000..c738a9355 --- /dev/null +++ b/pkg/sentry/kernel/epoll/epoll_state_autogen.go @@ -0,0 +1,99 @@ +// automatically generated by stateify. + +package epoll + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *FileIdentifier) beforeSave() {} +func (x *FileIdentifier) save(m state.Map) { + x.beforeSave() + m.Save("File", &x.File) + m.Save("Fd", &x.Fd) +} + +func (x *FileIdentifier) afterLoad() {} +func (x *FileIdentifier) load(m state.Map) { + m.LoadWait("File", &x.File) + m.Load("Fd", &x.Fd) +} + +func (x *pollEntry) beforeSave() {} +func (x *pollEntry) save(m state.Map) { + x.beforeSave() + m.Save("pollEntryEntry", &x.pollEntryEntry) + m.Save("id", &x.id) + m.Save("userData", &x.userData) + m.Save("mask", &x.mask) + m.Save("flags", &x.flags) + m.Save("epoll", &x.epoll) +} + +func (x *pollEntry) load(m state.Map) { + m.Load("pollEntryEntry", &x.pollEntryEntry) + m.LoadWait("id", &x.id) + m.Load("userData", &x.userData) + m.Load("mask", &x.mask) + m.Load("flags", &x.flags) + m.Load("epoll", &x.epoll) + m.AfterLoad(x.afterLoad) +} + +func (x *EventPoll) beforeSave() {} +func (x *EventPoll) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.FilePipeSeek) { m.Failf("FilePipeSeek is %v, expected zero", x.FilePipeSeek) } + if !state.IsZeroValue(x.FileNotDirReaddir) { m.Failf("FileNotDirReaddir is %v, expected zero", x.FileNotDirReaddir) } + if !state.IsZeroValue(x.FileNoFsync) { m.Failf("FileNoFsync is %v, expected zero", x.FileNoFsync) } + if !state.IsZeroValue(x.FileNoopFlush) { m.Failf("FileNoopFlush is %v, expected zero", x.FileNoopFlush) } + if !state.IsZeroValue(x.FileNoIoctl) { m.Failf("FileNoIoctl is %v, expected zero", x.FileNoIoctl) } + if !state.IsZeroValue(x.FileNoMMap) { m.Failf("FileNoMMap is %v, expected zero", x.FileNoMMap) } + if !state.IsZeroValue(x.Queue) { m.Failf("Queue is %v, expected zero", x.Queue) } + m.Save("files", &x.files) + m.Save("readyList", &x.readyList) + m.Save("waitingList", &x.waitingList) + m.Save("disabledList", &x.disabledList) +} + +func (x *EventPoll) load(m state.Map) { + m.Load("files", &x.files) + m.Load("readyList", &x.readyList) + m.Load("waitingList", &x.waitingList) + m.Load("disabledList", &x.disabledList) + m.AfterLoad(x.afterLoad) +} + +func (x *pollEntryList) beforeSave() {} +func (x *pollEntryList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *pollEntryList) afterLoad() {} +func (x *pollEntryList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *pollEntryEntry) beforeSave() {} +func (x *pollEntryEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *pollEntryEntry) afterLoad() {} +func (x *pollEntryEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func init() { + state.Register("epoll.FileIdentifier", (*FileIdentifier)(nil), state.Fns{Save: (*FileIdentifier).save, Load: (*FileIdentifier).load}) + state.Register("epoll.pollEntry", (*pollEntry)(nil), state.Fns{Save: (*pollEntry).save, Load: (*pollEntry).load}) + state.Register("epoll.EventPoll", (*EventPoll)(nil), state.Fns{Save: (*EventPoll).save, Load: (*EventPoll).load}) + state.Register("epoll.pollEntryList", (*pollEntryList)(nil), state.Fns{Save: (*pollEntryList).save, Load: (*pollEntryList).load}) + state.Register("epoll.pollEntryEntry", (*pollEntryEntry)(nil), state.Fns{Save: (*pollEntryEntry).save, Load: (*pollEntryEntry).load}) +} diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go deleted file mode 100644 index 4a20d4c82..000000000 --- a/pkg/sentry/kernel/epoll/epoll_test.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package epoll - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs/filetest" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestFileDestroyed(t *testing.T) { - f := filetest.NewTestFile(t) - id := FileIdentifier{f, 12} - - efile := NewEventPoll(contexttest.Context(t)) - e := efile.FileOperations.(*EventPoll) - if err := e.AddEntry(id, 0, waiter.EventIn, [2]int32{}); err != nil { - t.Fatalf("addEntry failed: %v", err) - } - - // Check that we get an event reported twice in a row. - evt := e.ReadEvents(1) - if len(evt) != 1 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 1, len(evt)) - } - - evt = e.ReadEvents(1) - if len(evt) != 1 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 1, len(evt)) - } - - // Destroy the file. Check that we get no more events. - f.DecRef() - - evt = e.ReadEvents(1) - if len(evt) != 0 { - t.Fatalf("Unexpected number of ready events: want %v, got %v", 0, len(evt)) - } - -} diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD deleted file mode 100644 index 1c5f979d4..000000000 --- a/pkg/sentry/kernel/eventfd/BUILD +++ /dev/null @@ -1,33 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "eventfd", - srcs = ["eventfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/fdnotifier", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "eventfd_test", - size = "small", - srcs = ["eventfd_test.go"], - embed = [":eventfd"], - deps = [ - "//pkg/sentry/context/contexttest", - "//pkg/sentry/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go new file mode 100755 index 000000000..f6e94b521 --- /dev/null +++ b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go @@ -0,0 +1,27 @@ +// automatically generated by stateify. + +package eventfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *EventOperations) beforeSave() {} +func (x *EventOperations) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.wq) { m.Failf("wq is %v, expected zero", x.wq) } + m.Save("val", &x.val) + m.Save("semMode", &x.semMode) + m.Save("hostfd", &x.hostfd) +} + +func (x *EventOperations) afterLoad() {} +func (x *EventOperations) load(m state.Map) { + m.Load("val", &x.val) + m.Load("semMode", &x.semMode) + m.Load("hostfd", &x.hostfd) +} + +func init() { + state.Register("eventfd.EventOperations", (*EventOperations)(nil), state.Fns{Save: (*EventOperations).save, Load: (*EventOperations).load}) +} diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go deleted file mode 100644 index 018c7f3ef..000000000 --- a/pkg/sentry/kernel/eventfd/eventfd_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package eventfd - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestEventfd(t *testing.T) { - initVals := []uint64{ - 0, - // Using a non-zero initial value verifies that writing to an - // eventfd signals when the eventfd's counter was already - // non-zero. - 343, - } - - for _, initVal := range initVals { - ctx := contexttest.Context(t) - - // Make a new event that is writable. - event := New(ctx, initVal, false) - - // Register a callback for a write event. - w, ch := waiter.NewChannelEntry(nil) - event.EventRegister(&w, waiter.EventIn) - defer event.EventUnregister(&w) - - data := []byte("00000124") - // Create and submit a write request. - n, err := event.Writev(ctx, usermem.BytesIOSequence(data)) - if err != nil { - t.Fatal(err) - } - if n != 8 { - t.Errorf("eventfd.write wrote %d bytes, not full int64", n) - } - - // Check if the callback fired due to the write event. - select { - case <-ch: - default: - t.Errorf("Didn't get notified of EventIn after write") - } - } -} - -func TestEventfdStat(t *testing.T) { - ctx := contexttest.Context(t) - - // Make a new event that is writable. - event := New(ctx, 0, false) - - // Create and submit an stat request. - uattr, err := event.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - t.Fatalf("eventfd stat request failed: %v", err) - } - if uattr.Size != 0 { - t.Fatal("EventFD size should be 0") - } -} diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD deleted file mode 100644 index 5eddca115..000000000 --- a/pkg/sentry/kernel/fasync/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "fasync", - srcs = ["fasync.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/fasync", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/fasync/fasync_state_autogen.go b/pkg/sentry/kernel/fasync/fasync_state_autogen.go new file mode 100755 index 000000000..c17daa8a1 --- /dev/null +++ b/pkg/sentry/kernel/fasync/fasync_state_autogen.go @@ -0,0 +1,32 @@ +// automatically generated by stateify. + +package fasync + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *FileAsync) beforeSave() {} +func (x *FileAsync) save(m state.Map) { + x.beforeSave() + m.Save("e", &x.e) + m.Save("requester", &x.requester) + m.Save("registered", &x.registered) + m.Save("recipientPG", &x.recipientPG) + m.Save("recipientTG", &x.recipientTG) + m.Save("recipientT", &x.recipientT) +} + +func (x *FileAsync) afterLoad() {} +func (x *FileAsync) load(m state.Map) { + m.Load("e", &x.e) + m.Load("requester", &x.requester) + m.Load("registered", &x.registered) + m.Load("recipientPG", &x.recipientPG) + m.Load("recipientTG", &x.recipientTG) + m.Load("recipientT", &x.recipientT) +} + +func init() { + state.Register("fasync.FileAsync", (*FileAsync)(nil), state.Fns{Save: (*FileAsync).save, Load: (*FileAsync).load}) +} diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go deleted file mode 100644 index 2413788e7..000000000 --- a/pkg/sentry/kernel/fd_table_test.go +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "runtime" - "sync" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/filetest" - "gvisor.dev/gvisor/pkg/sentry/limits" -) - -const ( - // maxFD is the maximum FD to try to create in the map. - // - // This number of open files has been seen in the wild. - maxFD = 2 * 1024 -) - -func runTest(t testing.TB, fn func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet)) { - t.Helper() // Don't show in stacks. - - // Create the limits and context. - limitSet := limits.NewLimitSet() - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) - ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) - - // Create a test file.; - file := filetest.NewTestFile(t) - - // Create the table. - fdTable := new(FDTable) - fdTable.init() - - // Run the test. - fn(ctx, fdTable, file, limitSet) -} - -// TestFDTableMany allocates maxFD FDs, i.e. maxes out the FDTable, until there -// is no room, then makes sure that NewFDAt works and also that if we remove -// one and add one that works too. -func TestFDTableMany(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - for i := 0; i < maxFD; i++ { - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD) - } - } - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("fdTable.NewFDs(0, r) in full map: got nil, wanted error") - } - - if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { - t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - }) -} - -// TestFDTable does a set of simple tests to make sure simple adds, removes, -// GetRefs, and DecRefs work. The ordering is just weird enough that a -// table-driven approach seemed clumsy. -func TestFDTable(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet) { - // Cap the limit at one. - limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true) - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err) - } - - if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { - t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error") - } - - // Remove the previous limit. - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) - - if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { - t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) - } else if len(fds) != 1 || fds[0] != 1 { - t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds) - } - - if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { - t.Fatalf("Replacing FD 1 via fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - if err := fdTable.NewFDAt(ctx, maxFD+1, file, FDFlags{}); err == nil { - t.Fatalf("Using an FD that was too large via fdTable.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1) - } - - if ref, _ := fdTable.Get(1); ref == nil { - t.Fatalf("fdTable.Get(1): got nil, wanted %v", file) - } - - if ref, _ := fdTable.Get(2); ref != nil { - t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref) - } - - ref := fdTable.Remove(1) - if ref == nil { - t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success") - } - ref.DecRef() - - if ref := fdTable.Remove(1); ref != nil { - t.Fatalf("r.Remove(1) for a removed FD: got success, want failure") - } - }) -} - -func TestDescriptorFlags(t *testing.T) { - runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - if err := fdTable.NewFDAt(ctx, 2, file, FDFlags{CloseOnExec: true}); err != nil { - t.Fatalf("fdTable.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err) - } - - newFile, flags := fdTable.Get(2) - if newFile == nil { - t.Fatalf("fdTable.Get(2): got a %v, wanted nil", newFile) - } - - if !flags.CloseOnExec { - t.Fatalf("new File flags %v don't match original %d\n", flags, 0) - } - }) -} - -func BenchmarkFDLookupAndDecRef(b *testing.B) { - b.StopTimer() // Setup. - - runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) - if err != nil { - b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) - } - - b.StartTimer() // Benchmark. - for i := 0; i < b.N; i++ { - tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef() - } - }) -} - -func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) { - b.StopTimer() // Setup. - - runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { - fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) - if err != nil { - b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) - } - - concurrency := runtime.GOMAXPROCS(0) - if concurrency < 4 { - concurrency = 4 - } - each := b.N / concurrency - - b.StartTimer() // Benchmark. - var wg sync.WaitGroup - for i := 0; i < concurrency; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for i := 0; i < each; i++ { - tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef() - } - }() - } - wg.Wait() - }) -} diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD deleted file mode 100644 index 6a31dc044..000000000 --- a/pkg/sentry/kernel/futex/BUILD +++ /dev/null @@ -1,54 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "atomicptr_bucket", - out = "atomicptr_bucket_unsafe.go", - package = "futex", - suffix = "Bucket", - template = "//third_party/gvsync:generic_atomicptr", - types = { - "Value": "bucket", - }, -) - -go_template_instance( - name = "waiter_list", - out = "waiter_list.go", - package = "futex", - prefix = "waiter", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*Waiter", - "Linker": "*Waiter", - }, -) - -go_library( - name = "futex", - srcs = [ - "atomicptr_bucket_unsafe.go", - "futex.go", - "waiter_list.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/futex", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/sentry/memmap", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) - -go_test( - name = "futex_test", - size = "small", - srcs = ["futex_test.go"], - embed = [":futex"], - deps = ["//pkg/sentry/usermem"], -) diff --git a/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go new file mode 100755 index 000000000..d3fdf09b0 --- /dev/null +++ b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go @@ -0,0 +1,37 @@ +package futex + +import ( + "sync/atomic" + "unsafe" +) + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtrBucket struct { + ptr unsafe.Pointer `state:".(*bucket)"` +} + +func (p *AtomicPtrBucket) savePtr() *bucket { + return p.Load() +} + +func (p *AtomicPtrBucket) loadPtr(v *bucket) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtrBucket) Load() *bucket { + return (*bucket)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtrBucket) Store(x *bucket) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sentry/kernel/futex/futex_state_autogen.go b/pkg/sentry/kernel/futex/futex_state_autogen.go new file mode 100755 index 000000000..b2ff39f17 --- /dev/null +++ b/pkg/sentry/kernel/futex/futex_state_autogen.go @@ -0,0 +1,75 @@ +// automatically generated by stateify. + +package futex + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *AtomicPtrBucket) beforeSave() {} +func (x *AtomicPtrBucket) save(m state.Map) { + x.beforeSave() + var ptr *bucket = x.savePtr() + m.SaveValue("ptr", ptr) +} + +func (x *AtomicPtrBucket) afterLoad() {} +func (x *AtomicPtrBucket) load(m state.Map) { + m.LoadValue("ptr", new(*bucket), func(y interface{}) { x.loadPtr(y.(*bucket)) }) +} + +func (x *bucket) beforeSave() {} +func (x *bucket) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.waiters) { m.Failf("waiters is %v, expected zero", x.waiters) } +} + +func (x *bucket) afterLoad() {} +func (x *bucket) load(m state.Map) { +} + +func (x *Manager) beforeSave() {} +func (x *Manager) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.privateBuckets) { m.Failf("privateBuckets is %v, expected zero", x.privateBuckets) } + m.Save("sharedBucket", &x.sharedBucket) +} + +func (x *Manager) afterLoad() {} +func (x *Manager) load(m state.Map) { + m.Load("sharedBucket", &x.sharedBucket) +} + +func (x *waiterList) beforeSave() {} +func (x *waiterList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *waiterList) afterLoad() {} +func (x *waiterList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *waiterEntry) beforeSave() {} +func (x *waiterEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *waiterEntry) afterLoad() {} +func (x *waiterEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func init() { + state.Register("futex.AtomicPtrBucket", (*AtomicPtrBucket)(nil), state.Fns{Save: (*AtomicPtrBucket).save, Load: (*AtomicPtrBucket).load}) + state.Register("futex.bucket", (*bucket)(nil), state.Fns{Save: (*bucket).save, Load: (*bucket).load}) + state.Register("futex.Manager", (*Manager)(nil), state.Fns{Save: (*Manager).save, Load: (*Manager).load}) + state.Register("futex.waiterList", (*waiterList)(nil), state.Fns{Save: (*waiterList).save, Load: (*waiterList).load}) + state.Register("futex.waiterEntry", (*waiterEntry)(nil), state.Fns{Save: (*waiterEntry).save, Load: (*waiterEntry).load}) +} diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go deleted file mode 100644 index 65e5d1428..000000000 --- a/pkg/sentry/kernel/futex/futex_test.go +++ /dev/null @@ -1,530 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package futex - -import ( - "math" - "runtime" - "sync" - "sync/atomic" - "syscall" - "testing" - "unsafe" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -// testData implements the Target interface, and allows us to -// treat the address passed for futex operations as an index in -// a byte slice for testing simplicity. -type testData []byte - -const sizeofInt32 = 4 - -func newTestData(size uint) testData { - return make([]byte, size) -} - -func (t testData) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) { - val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t[addr])), new) - return val, nil -} - -func (t testData) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) { - if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t[addr])), old, new) { - return old, nil - } - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil -} - -func (t testData) LoadUint32(addr usermem.Addr) (uint32, error) { - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil -} - -func (t testData) GetSharedKey(addr usermem.Addr) (Key, error) { - return Key{ - Kind: KindSharedMappable, - Offset: uint64(addr), - }, nil -} - -func futexKind(private bool) string { - if private { - return "private" - } - return "shared" -} - -func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr usermem.Addr, private bool, val uint32, bitmask uint32) *Waiter { - w := NewWaiter() - if err := m.WaitPrepare(w, ta, addr, private, val, bitmask); err != nil { - t.Fatalf("WaitPrepare failed: %v", err) - } - return w -} - -func TestFutexWake(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start waiting for wakeup. - w := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w) - - // Perform a wakeup. - if n, err := m.Wake(d, 0, private, ^uint32(0), 1); err != nil || n != 1 { - t.Errorf("Wake: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect the waiter to have been woken. - if !w.woken() { - t.Error("waiter not woken") - } - }) - } -} - -func TestFutexWakeBitmask(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start waiting for wakeup. - w := newPreparedTestWaiter(t, m, d, 0, private, 0, 0x0000ffff) - defer m.WaitComplete(w) - - // Perform a wakeup using the wrong bitmask. - if n, err := m.Wake(d, 0, private, 0xffff0000, 1); err != nil || n != 0 { - t.Errorf("Wake with non-matching bitmask: got (%d, %v), wanted (0, nil)", n, err) - } - - // Expect the waiter to still be waiting. - if w.woken() { - t.Error("waiter woken unexpectedly") - } - - // Perform a wakeup using the right bitmask. - if n, err := m.Wake(d, 0, private, 0x00000001, 1); err != nil || n != 1 { - t.Errorf("Wake with matching bitmask: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect that the waiter was woken. - if !w.woken() { - t.Error("waiter not woken") - } - }) - } -} - -func TestFutexWakeTwo(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(sizeofInt32) - - // Start three waiters waiting for wakeup. - var ws [3]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i]) - } - - // Perform two wakeups. - const wakeups = 2 - if n, err := m.Wake(d, 0, private, ^uint32(0), 2); err != nil || n != wakeups { - t.Errorf("Wake: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly two waiters were woken. - // We don't get guarantees about exactly which two, - // (although we expect them to be w1 and w2). - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -func TestFutexWakeUnrelated(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(2 * sizeofInt32) - - // Start two waiters waiting for wakeup on different addresses. - w1 := newPreparedTestWaiter(t, m, d, 0*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) - w2 := newPreparedTestWaiter(t, m, d, 1*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) - - // Perform two wakeups on the second address. - if n, err := m.Wake(d, 1*sizeofInt32, private, ^uint32(0), 2); err != nil || n != 1 { - t.Errorf("Wake: got (%d, %v), wanted (1, nil)", n, err) - } - - // Expect that only the second waiter was woken. - if w1.woken() { - t.Error("w1 woken unexpectedly") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(2 * sizeofInt32) - - // Perform wakeups with no waiters. - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 0); err != nil || n != 0 { - t.Fatalf("WakeOp: got (%d, %v), wanted (0, nil)", n, err) - } - }) - } -} - -func TestWakeOpFirstNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) - - // Perform 10 wakeups on address 0. - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 0, 0); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that both waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpSecondNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address sizeofInt32. - w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) - w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) - - // Perform 10 wakeups on address sizeofInt32 (contingent on - // d.Op(0), which should succeed). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 0, 10, 0); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that both waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - }) - } -} - -func TestWakeOpSecondNonEmptyFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address sizeofInt32. - w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) - w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) - - // Perform 10 wakeups on address sizeofInt32 (contingent on - // d.Op(1), which should fail). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 0, 10, 1); err != nil || n != 0 { - t.Errorf("WakeOp: got (%d, %v), wanted (0, nil)", n, err) - } - - // Expect that neither waiter was woken. - if w1.woken() { - t.Error("w1 woken unexpectedly") - } - if w2.woken() { - t.Error("w2 woken unexpectedly") - } - }) - } -} - -func TestWakeOpAllNonEmpty(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) - - // Add two waiters on address sizeofInt32. - w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3) - w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4) - - // Perform 10 wakeups on address 0 (unconditionally), and 10 - // wakeups on address sizeofInt32 (contingent on d.Op(0), which - // should succeed). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 0); err != nil || n != 4 { - t.Errorf("WakeOp: got (%d, %v), wanted (4, nil)", n, err) - } - - // Expect that all waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - if !w3.woken() { - t.Error("w3 not woken") - } - if !w4.woken() { - t.Error("w4 not woken") - } - }) - } -} - -func TestWakeOpAllNonEmptyFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add two waiters on address 0. - w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) - w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) - - // Add two waiters on address sizeofInt32. - w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3) - w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4) - - // Perform 10 wakeups on address 0 (unconditionally), and 10 - // wakeups on address sizeofInt32 (contingent on d.Op(1), which - // should fail). - if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 10, 1); err != nil || n != 2 { - t.Errorf("WakeOp: got (%d, %v), wanted (2, nil)", n, err) - } - - // Expect that only the first two waiters were woken. - if !w1.woken() { - t.Error("w1 not woken") - } - if !w2.woken() { - t.Error("w2 not woken") - } - if w3.woken() { - t.Error("w3 woken unexpectedly") - } - if w4.woken() { - t.Error("w4 woken unexpectedly") - } - }) - } -} - -func TestWakeOpSameAddress(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add four waiters on address 0. - var ws [4]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i]) - } - - // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup - // on address 0 (contingent on d.Op(0), which should succeed). - const wakeups = 2 - if n, err := m.WakeOp(d, 0, 0, private, 1, 1, 0); err != nil || n != wakeups { - t.Errorf("WakeOp: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly two waiters were woken. - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -func TestWakeOpSameAddressFailingOp(t *testing.T) { - for _, private := range []bool{false, true} { - t.Run(futexKind(private), func(t *testing.T) { - m := NewManager() - d := newTestData(8) - - // Add four waiters on address 0. - var ws [4]*Waiter - for i := range ws { - ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i]) - } - - // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup - // on address 0 (contingent on d.Op(1), which should fail). - const wakeups = 1 - if n, err := m.WakeOp(d, 0, 0, private, 1, 1, 1); err != nil || n != wakeups { - t.Errorf("WakeOp: got (%d, %v), wanted (%d, nil)", n, err, wakeups) - } - - // Expect that exactly one waiter was woken. - awake := 0 - for i := range ws { - if ws[i].woken() { - awake++ - } - } - if awake != wakeups { - t.Errorf("got %d woken waiters, wanted %d", awake, wakeups) - } - }) - } -} - -const ( - testMutexSize = sizeofInt32 - testMutexLocked uint32 = 1 - testMutexUnlocked uint32 = 0 -) - -// testMutex ties together a testData slice, an address, and a -// futex manager in order to implement the sync.Locker interface. -// Beyond being used as a Locker, this is a simple mechanism for -// changing the underlying values for simpler tests. -type testMutex struct { - a usermem.Addr - d testData - m *Manager -} - -func newTestMutex(addr usermem.Addr, d testData, m *Manager) *testMutex { - return &testMutex{a: addr, d: d, m: m} -} - -// Lock acquires the testMutex. -// This may wait for it to be available via the futex manager. -func (t *testMutex) Lock() { - for { - // Attempt to grab the lock. - if atomic.CompareAndSwapUint32( - (*uint32)(unsafe.Pointer(&t.d[t.a])), - testMutexUnlocked, - testMutexLocked) { - // Lock held. - return - } - - // Wait for it to be "not locked". - w := NewWaiter() - err := t.m.WaitPrepare(w, t.d, t.a, true, testMutexLocked, ^uint32(0)) - if err == syscall.EAGAIN { - continue - } - if err != nil { - // Should never happen. - panic("WaitPrepare returned unexpected error: " + err.Error()) - } - <-w.C - t.m.WaitComplete(w) - } -} - -// Unlock releases the testMutex. -// This will notify any waiters via the futex manager. -func (t *testMutex) Unlock() { - // Unlock. - atomic.StoreUint32((*uint32)(unsafe.Pointer(&t.d[t.a])), testMutexUnlocked) - - // Notify all waiters. - t.m.Wake(t.d, t.a, true, ^uint32(0), math.MaxInt32) -} - -// This function was shamelessly stolen from mutex_test.go. -func HammerMutex(l sync.Locker, loops int, cdone chan bool) { - for i := 0; i < loops; i++ { - l.Lock() - runtime.Gosched() - l.Unlock() - } - cdone <- true -} - -func TestMutexStress(t *testing.T) { - m := NewManager() - d := newTestData(testMutexSize) - tm := newTestMutex(0*testMutexSize, d, m) - c := make(chan bool) - - for i := 0; i < 10; i++ { - go HammerMutex(tm, 1000, c) - } - - for i := 0; i < 10; i++ { - <-c - } -} diff --git a/pkg/sentry/kernel/futex/waiter_list.go b/pkg/sentry/kernel/futex/waiter_list.go new file mode 100755 index 000000000..cca5c4721 --- /dev/null +++ b/pkg/sentry/kernel/futex/waiter_list.go @@ -0,0 +1,173 @@ +package futex + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type waiterElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (waiterElementMapper) linkerFor(elem *Waiter) *Waiter { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type waiterList struct { + head *Waiter + tail *Waiter +} + +// Reset resets list l to the empty state. +func (l *waiterList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *waiterList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *waiterList) Front() *Waiter { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *waiterList) Back() *Waiter { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *waiterList) PushFront(e *Waiter) { + waiterElementMapper{}.linkerFor(e).SetNext(l.head) + waiterElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + waiterElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *waiterList) PushBack(e *Waiter) { + waiterElementMapper{}.linkerFor(e).SetNext(nil) + waiterElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *waiterList) PushBackList(m *waiterList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) + waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *waiterList) InsertAfter(b, e *Waiter) { + a := waiterElementMapper{}.linkerFor(b).Next() + waiterElementMapper{}.linkerFor(e).SetNext(a) + waiterElementMapper{}.linkerFor(e).SetPrev(b) + waiterElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + waiterElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *waiterList) InsertBefore(a, e *Waiter) { + b := waiterElementMapper{}.linkerFor(a).Prev() + waiterElementMapper{}.linkerFor(e).SetNext(a) + waiterElementMapper{}.linkerFor(e).SetPrev(b) + waiterElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + waiterElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *waiterList) Remove(e *Waiter) { + prev := waiterElementMapper{}.linkerFor(e).Prev() + next := waiterElementMapper{}.linkerFor(e).Next() + + if prev != nil { + waiterElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + waiterElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type waiterEntry struct { + next *Waiter + prev *Waiter +} + +// Next returns the entry that follows e in the list. +func (e *waiterEntry) Next() *Waiter { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *waiterEntry) Prev() *Waiter { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *waiterEntry) SetNext(elem *Waiter) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *waiterEntry) SetPrev(elem *Waiter) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/g3doc/run_states.dot b/pkg/sentry/kernel/g3doc/run_states.dot deleted file mode 100644 index 7861fe1f5..000000000 --- a/pkg/sentry/kernel/g3doc/run_states.dot +++ /dev/null @@ -1,99 +0,0 @@ -digraph { - subgraph { - App; - } - subgraph { - Interrupt; - InterruptAfterSignalDeliveryStop; - } - subgraph { - Syscall; - SyscallAfterPtraceEventSeccomp; - SyscallEnter; - SyscallAfterSyscallEnterStop; - SyscallAfterSysemuStop; - SyscallInvoke; - SyscallAfterPtraceEventClone; - SyscallAfterExecStop; - SyscallAfterVforkStop; - SyscallReinvoke; - SyscallExit; - } - subgraph { - Vsyscall; - VsyscallAfterPtraceEventSeccomp; - VsyscallInvoke; - } - subgraph { - Exit; - ExitMain; // leave thread group, release resources, reparent children, kill PID namespace and wait if TGID 1 - ExitNotify; // signal parent/tracer, become waitable - ExitDone; // represented by t.runState == nil - } - - // Task exit - Exit -> ExitMain; - ExitMain -> ExitNotify; - ExitNotify -> ExitDone; - - // Execution of untrusted application code - App -> App; - - // Interrupts (usually signal delivery) - App -> Interrupt; - Interrupt -> Interrupt; // if other interrupt conditions may still apply - Interrupt -> Exit; // if killed - - // Syscalls - App -> Syscall; - Syscall -> SyscallEnter; - SyscallEnter -> SyscallInvoke; - SyscallInvoke -> SyscallExit; - SyscallExit -> App; - - // exit, exit_group - SyscallInvoke -> Exit; - - // execve - SyscallInvoke -> SyscallAfterExecStop; - SyscallAfterExecStop -> SyscallExit; - SyscallAfterExecStop -> App; // fatal signal pending - - // vfork - SyscallInvoke -> SyscallAfterVforkStop; - SyscallAfterVforkStop -> SyscallExit; - - // Vsyscalls - App -> Vsyscall; - Vsyscall -> VsyscallInvoke; - Vsyscall -> App; // fault while reading return address from stack - VsyscallInvoke -> App; - - // ptrace-specific branches - Interrupt -> InterruptAfterSignalDeliveryStop; - InterruptAfterSignalDeliveryStop -> Interrupt; - SyscallEnter -> SyscallAfterSyscallEnterStop; - SyscallAfterSyscallEnterStop -> SyscallInvoke; - SyscallAfterSyscallEnterStop -> SyscallExit; // skipped by tracer - SyscallAfterSyscallEnterStop -> App; // fatal signal pending - SyscallEnter -> SyscallAfterSysemuStop; - SyscallAfterSysemuStop -> SyscallExit; - SyscallAfterSysemuStop -> App; // fatal signal pending - SyscallInvoke -> SyscallAfterPtraceEventClone; - SyscallAfterPtraceEventClone -> SyscallExit; - SyscallAfterPtraceEventClone -> SyscallAfterVforkStop; - - // seccomp - Syscall -> App; // SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_KILL, SECCOMP_RET_TRACE without tracer - Syscall -> SyscallAfterPtraceEventSeccomp; // SECCOMP_RET_TRACE - SyscallAfterPtraceEventSeccomp -> SyscallEnter; - SyscallAfterPtraceEventSeccomp -> SyscallExit; // skipped by tracer - SyscallAfterPtraceEventSeccomp -> App; // fatal signal pending - Vsyscall -> VsyscallAfterPtraceEventSeccomp; - VsyscallAfterPtraceEventSeccomp -> VsyscallInvoke; - VsyscallAfterPtraceEventSeccomp -> App; - - // Autosave - SyscallInvoke -> SyscallReinvoke; - SyscallReinvoke -> SyscallInvoke; -} diff --git a/pkg/sentry/kernel/g3doc/run_states.png b/pkg/sentry/kernel/g3doc/run_states.png Binary files differdeleted file mode 100644 index b63b60f02..000000000 --- a/pkg/sentry/kernel/g3doc/run_states.png +++ /dev/null diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go new file mode 100755 index 000000000..ae3decf57 --- /dev/null +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -0,0 +1,1198 @@ +// automatically generated by stateify. + +package kernel + +import ( + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/tcpip" +) + +func (x *abstractEndpoint) beforeSave() {} +func (x *abstractEndpoint) save(m state.Map) { + x.beforeSave() + m.Save("ep", &x.ep) + m.Save("wr", &x.wr) + m.Save("name", &x.name) + m.Save("ns", &x.ns) +} + +func (x *abstractEndpoint) afterLoad() {} +func (x *abstractEndpoint) load(m state.Map) { + m.Load("ep", &x.ep) + m.Load("wr", &x.wr) + m.Load("name", &x.name) + m.Load("ns", &x.ns) +} + +func (x *AbstractSocketNamespace) beforeSave() {} +func (x *AbstractSocketNamespace) save(m state.Map) { + x.beforeSave() + m.Save("endpoints", &x.endpoints) +} + +func (x *AbstractSocketNamespace) afterLoad() {} +func (x *AbstractSocketNamespace) load(m state.Map) { + m.Load("endpoints", &x.endpoints) +} + +func (x *FDFlags) beforeSave() {} +func (x *FDFlags) save(m state.Map) { + x.beforeSave() + m.Save("CloseOnExec", &x.CloseOnExec) +} + +func (x *FDFlags) afterLoad() {} +func (x *FDFlags) load(m state.Map) { + m.Load("CloseOnExec", &x.CloseOnExec) +} + +func (x *descriptor) beforeSave() {} +func (x *descriptor) save(m state.Map) { + x.beforeSave() + m.Save("file", &x.file) + m.Save("flags", &x.flags) +} + +func (x *descriptor) afterLoad() {} +func (x *descriptor) load(m state.Map) { + m.Load("file", &x.file) + m.Load("flags", &x.flags) +} + +func (x *FDTable) beforeSave() {} +func (x *FDTable) save(m state.Map) { + x.beforeSave() + var descriptorTable map[int32]descriptor = x.saveDescriptorTable() + m.SaveValue("descriptorTable", descriptorTable) + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("k", &x.k) + m.Save("uid", &x.uid) + m.Save("used", &x.used) +} + +func (x *FDTable) afterLoad() {} +func (x *FDTable) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("k", &x.k) + m.Load("uid", &x.uid) + m.Load("used", &x.used) + m.LoadValue("descriptorTable", new(map[int32]descriptor), func(y interface{}) { x.loadDescriptorTable(y.(map[int32]descriptor)) }) +} + +func (x *FSContext) beforeSave() {} +func (x *FSContext) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("root", &x.root) + m.Save("cwd", &x.cwd) + m.Save("umask", &x.umask) +} + +func (x *FSContext) afterLoad() {} +func (x *FSContext) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("root", &x.root) + m.Load("cwd", &x.cwd) + m.Load("umask", &x.umask) +} + +func (x *IPCNamespace) beforeSave() {} +func (x *IPCNamespace) save(m state.Map) { + x.beforeSave() + m.Save("userNS", &x.userNS) + m.Save("semaphores", &x.semaphores) + m.Save("shms", &x.shms) +} + +func (x *IPCNamespace) afterLoad() {} +func (x *IPCNamespace) load(m state.Map) { + m.Load("userNS", &x.userNS) + m.Load("semaphores", &x.semaphores) + m.Load("shms", &x.shms) +} + +func (x *Kernel) beforeSave() {} +func (x *Kernel) save(m state.Map) { + x.beforeSave() + var danglingEndpoints []tcpip.Endpoint = x.saveDanglingEndpoints() + m.SaveValue("danglingEndpoints", danglingEndpoints) + var deviceRegistry *device.Registry = x.saveDeviceRegistry() + m.SaveValue("deviceRegistry", deviceRegistry) + m.Save("featureSet", &x.featureSet) + m.Save("timekeeper", &x.timekeeper) + m.Save("tasks", &x.tasks) + m.Save("rootUserNamespace", &x.rootUserNamespace) + m.Save("applicationCores", &x.applicationCores) + m.Save("useHostCores", &x.useHostCores) + m.Save("extraAuxv", &x.extraAuxv) + m.Save("vdso", &x.vdso) + m.Save("rootUTSNamespace", &x.rootUTSNamespace) + m.Save("rootIPCNamespace", &x.rootIPCNamespace) + m.Save("rootAbstractSocketNamespace", &x.rootAbstractSocketNamespace) + m.Save("futexes", &x.futexes) + m.Save("globalInit", &x.globalInit) + m.Save("realtimeClock", &x.realtimeClock) + m.Save("monotonicClock", &x.monotonicClock) + m.Save("syslog", &x.syslog) + m.Save("cpuClock", &x.cpuClock) + m.Save("fdMapUids", &x.fdMapUids) + m.Save("uniqueID", &x.uniqueID) + m.Save("nextInotifyCookie", &x.nextInotifyCookie) + m.Save("netlinkPorts", &x.netlinkPorts) + m.Save("sockets", &x.sockets) + m.Save("nextSocketEntry", &x.nextSocketEntry) + m.Save("DirentCacheLimiter", &x.DirentCacheLimiter) +} + +func (x *Kernel) afterLoad() {} +func (x *Kernel) load(m state.Map) { + m.Load("featureSet", &x.featureSet) + m.Load("timekeeper", &x.timekeeper) + m.Load("tasks", &x.tasks) + m.Load("rootUserNamespace", &x.rootUserNamespace) + m.Load("applicationCores", &x.applicationCores) + m.Load("useHostCores", &x.useHostCores) + m.Load("extraAuxv", &x.extraAuxv) + m.Load("vdso", &x.vdso) + m.Load("rootUTSNamespace", &x.rootUTSNamespace) + m.Load("rootIPCNamespace", &x.rootIPCNamespace) + m.Load("rootAbstractSocketNamespace", &x.rootAbstractSocketNamespace) + m.Load("futexes", &x.futexes) + m.Load("globalInit", &x.globalInit) + m.Load("realtimeClock", &x.realtimeClock) + m.Load("monotonicClock", &x.monotonicClock) + m.Load("syslog", &x.syslog) + m.Load("cpuClock", &x.cpuClock) + m.Load("fdMapUids", &x.fdMapUids) + m.Load("uniqueID", &x.uniqueID) + m.Load("nextInotifyCookie", &x.nextInotifyCookie) + m.Load("netlinkPorts", &x.netlinkPorts) + m.Load("sockets", &x.sockets) + m.Load("nextSocketEntry", &x.nextSocketEntry) + m.Load("DirentCacheLimiter", &x.DirentCacheLimiter) + m.LoadValue("danglingEndpoints", new([]tcpip.Endpoint), func(y interface{}) { x.loadDanglingEndpoints(y.([]tcpip.Endpoint)) }) + m.LoadValue("deviceRegistry", new(*device.Registry), func(y interface{}) { x.loadDeviceRegistry(y.(*device.Registry)) }) +} + +func (x *SocketEntry) beforeSave() {} +func (x *SocketEntry) save(m state.Map) { + x.beforeSave() + m.Save("socketEntry", &x.socketEntry) + m.Save("k", &x.k) + m.Save("Sock", &x.Sock) + m.Save("ID", &x.ID) +} + +func (x *SocketEntry) afterLoad() {} +func (x *SocketEntry) load(m state.Map) { + m.Load("socketEntry", &x.socketEntry) + m.Load("k", &x.k) + m.Load("Sock", &x.Sock) + m.Load("ID", &x.ID) +} + +func (x *pendingSignals) beforeSave() {} +func (x *pendingSignals) save(m state.Map) { + x.beforeSave() + var signals []savedPendingSignal = x.saveSignals() + m.SaveValue("signals", signals) +} + +func (x *pendingSignals) afterLoad() {} +func (x *pendingSignals) load(m state.Map) { + m.LoadValue("signals", new([]savedPendingSignal), func(y interface{}) { x.loadSignals(y.([]savedPendingSignal)) }) +} + +func (x *pendingSignalQueue) beforeSave() {} +func (x *pendingSignalQueue) save(m state.Map) { + x.beforeSave() + m.Save("pendingSignalList", &x.pendingSignalList) + m.Save("length", &x.length) +} + +func (x *pendingSignalQueue) afterLoad() {} +func (x *pendingSignalQueue) load(m state.Map) { + m.Load("pendingSignalList", &x.pendingSignalList) + m.Load("length", &x.length) +} + +func (x *pendingSignal) beforeSave() {} +func (x *pendingSignal) save(m state.Map) { + x.beforeSave() + m.Save("pendingSignalEntry", &x.pendingSignalEntry) + m.Save("SignalInfo", &x.SignalInfo) + m.Save("timer", &x.timer) +} + +func (x *pendingSignal) afterLoad() {} +func (x *pendingSignal) load(m state.Map) { + m.Load("pendingSignalEntry", &x.pendingSignalEntry) + m.Load("SignalInfo", &x.SignalInfo) + m.Load("timer", &x.timer) +} + +func (x *pendingSignalList) beforeSave() {} +func (x *pendingSignalList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *pendingSignalList) afterLoad() {} +func (x *pendingSignalList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *pendingSignalEntry) beforeSave() {} +func (x *pendingSignalEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *pendingSignalEntry) afterLoad() {} +func (x *pendingSignalEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *savedPendingSignal) beforeSave() {} +func (x *savedPendingSignal) save(m state.Map) { + x.beforeSave() + m.Save("si", &x.si) + m.Save("timer", &x.timer) +} + +func (x *savedPendingSignal) afterLoad() {} +func (x *savedPendingSignal) load(m state.Map) { + m.Load("si", &x.si) + m.Load("timer", &x.timer) +} + +func (x *IntervalTimer) beforeSave() {} +func (x *IntervalTimer) save(m state.Map) { + x.beforeSave() + m.Save("timer", &x.timer) + m.Save("target", &x.target) + m.Save("signo", &x.signo) + m.Save("id", &x.id) + m.Save("sigval", &x.sigval) + m.Save("group", &x.group) + m.Save("sigpending", &x.sigpending) + m.Save("sigorphan", &x.sigorphan) + m.Save("overrunCur", &x.overrunCur) + m.Save("overrunLast", &x.overrunLast) +} + +func (x *IntervalTimer) afterLoad() {} +func (x *IntervalTimer) load(m state.Map) { + m.Load("timer", &x.timer) + m.Load("target", &x.target) + m.Load("signo", &x.signo) + m.Load("id", &x.id) + m.Load("sigval", &x.sigval) + m.Load("group", &x.group) + m.Load("sigpending", &x.sigpending) + m.Load("sigorphan", &x.sigorphan) + m.Load("overrunCur", &x.overrunCur) + m.Load("overrunLast", &x.overrunLast) +} + +func (x *processGroupList) beforeSave() {} +func (x *processGroupList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *processGroupList) afterLoad() {} +func (x *processGroupList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *processGroupEntry) beforeSave() {} +func (x *processGroupEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *processGroupEntry) afterLoad() {} +func (x *processGroupEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *ptraceOptions) beforeSave() {} +func (x *ptraceOptions) save(m state.Map) { + x.beforeSave() + m.Save("ExitKill", &x.ExitKill) + m.Save("SysGood", &x.SysGood) + m.Save("TraceClone", &x.TraceClone) + m.Save("TraceExec", &x.TraceExec) + m.Save("TraceExit", &x.TraceExit) + m.Save("TraceFork", &x.TraceFork) + m.Save("TraceSeccomp", &x.TraceSeccomp) + m.Save("TraceVfork", &x.TraceVfork) + m.Save("TraceVforkDone", &x.TraceVforkDone) +} + +func (x *ptraceOptions) afterLoad() {} +func (x *ptraceOptions) load(m state.Map) { + m.Load("ExitKill", &x.ExitKill) + m.Load("SysGood", &x.SysGood) + m.Load("TraceClone", &x.TraceClone) + m.Load("TraceExec", &x.TraceExec) + m.Load("TraceExit", &x.TraceExit) + m.Load("TraceFork", &x.TraceFork) + m.Load("TraceSeccomp", &x.TraceSeccomp) + m.Load("TraceVfork", &x.TraceVfork) + m.Load("TraceVforkDone", &x.TraceVforkDone) +} + +func (x *ptraceStop) beforeSave() {} +func (x *ptraceStop) save(m state.Map) { + x.beforeSave() + m.Save("frozen", &x.frozen) + m.Save("listen", &x.listen) +} + +func (x *ptraceStop) afterLoad() {} +func (x *ptraceStop) load(m state.Map) { + m.Load("frozen", &x.frozen) + m.Load("listen", &x.listen) +} + +func (x *RSEQCriticalRegion) beforeSave() {} +func (x *RSEQCriticalRegion) save(m state.Map) { + x.beforeSave() + m.Save("CriticalSection", &x.CriticalSection) + m.Save("Restart", &x.Restart) +} + +func (x *RSEQCriticalRegion) afterLoad() {} +func (x *RSEQCriticalRegion) load(m state.Map) { + m.Load("CriticalSection", &x.CriticalSection) + m.Load("Restart", &x.Restart) +} + +func (x *sessionList) beforeSave() {} +func (x *sessionList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *sessionList) afterLoad() {} +func (x *sessionList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *sessionEntry) beforeSave() {} +func (x *sessionEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *sessionEntry) afterLoad() {} +func (x *sessionEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *Session) beforeSave() {} +func (x *Session) save(m state.Map) { + x.beforeSave() + m.Save("refs", &x.refs) + m.Save("leader", &x.leader) + m.Save("id", &x.id) + m.Save("foreground", &x.foreground) + m.Save("processGroups", &x.processGroups) + m.Save("sessionEntry", &x.sessionEntry) +} + +func (x *Session) afterLoad() {} +func (x *Session) load(m state.Map) { + m.Load("refs", &x.refs) + m.Load("leader", &x.leader) + m.Load("id", &x.id) + m.Load("foreground", &x.foreground) + m.Load("processGroups", &x.processGroups) + m.Load("sessionEntry", &x.sessionEntry) +} + +func (x *ProcessGroup) beforeSave() {} +func (x *ProcessGroup) save(m state.Map) { + x.beforeSave() + m.Save("refs", &x.refs) + m.Save("originator", &x.originator) + m.Save("id", &x.id) + m.Save("session", &x.session) + m.Save("ancestors", &x.ancestors) + m.Save("processGroupEntry", &x.processGroupEntry) +} + +func (x *ProcessGroup) afterLoad() {} +func (x *ProcessGroup) load(m state.Map) { + m.Load("refs", &x.refs) + m.Load("originator", &x.originator) + m.Load("id", &x.id) + m.Load("session", &x.session) + m.Load("ancestors", &x.ancestors) + m.Load("processGroupEntry", &x.processGroupEntry) +} + +func (x *SignalHandlers) beforeSave() {} +func (x *SignalHandlers) save(m state.Map) { + x.beforeSave() + m.Save("actions", &x.actions) +} + +func (x *SignalHandlers) afterLoad() {} +func (x *SignalHandlers) load(m state.Map) { + m.Load("actions", &x.actions) +} + +func (x *socketList) beforeSave() {} +func (x *socketList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *socketList) afterLoad() {} +func (x *socketList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *socketEntry) beforeSave() {} +func (x *socketEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *socketEntry) afterLoad() {} +func (x *socketEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *SyscallTable) beforeSave() {} +func (x *SyscallTable) save(m state.Map) { + x.beforeSave() + m.Save("OS", &x.OS) + m.Save("Arch", &x.Arch) +} + +func (x *SyscallTable) load(m state.Map) { + m.LoadWait("OS", &x.OS) + m.LoadWait("Arch", &x.Arch) + m.AfterLoad(x.afterLoad) +} + +func (x *syslog) beforeSave() {} +func (x *syslog) save(m state.Map) { + x.beforeSave() + m.Save("msg", &x.msg) +} + +func (x *syslog) afterLoad() {} +func (x *syslog) load(m state.Map) { + m.Load("msg", &x.msg) +} + +func (x *Task) beforeSave() {} +func (x *Task) save(m state.Map) { + x.beforeSave() + var ptraceTracer *Task = x.savePtraceTracer() + m.SaveValue("ptraceTracer", ptraceTracer) + var logPrefix string = x.saveLogPrefix() + m.SaveValue("logPrefix", logPrefix) + var syscallFilters []bpf.Program = x.saveSyscallFilters() + m.SaveValue("syscallFilters", syscallFilters) + m.Save("taskNode", &x.taskNode) + m.Save("runState", &x.runState) + m.Save("haveSyscallReturn", &x.haveSyscallReturn) + m.Save("gosched", &x.gosched) + m.Save("yieldCount", &x.yieldCount) + m.Save("pendingSignals", &x.pendingSignals) + m.Save("signalMask", &x.signalMask) + m.Save("realSignalMask", &x.realSignalMask) + m.Save("haveSavedSignalMask", &x.haveSavedSignalMask) + m.Save("savedSignalMask", &x.savedSignalMask) + m.Save("signalStack", &x.signalStack) + m.Save("groupStopPending", &x.groupStopPending) + m.Save("groupStopAcknowledged", &x.groupStopAcknowledged) + m.Save("trapStopPending", &x.trapStopPending) + m.Save("trapNotifyPending", &x.trapNotifyPending) + m.Save("stop", &x.stop) + m.Save("exitStatus", &x.exitStatus) + m.Save("syscallRestartBlock", &x.syscallRestartBlock) + m.Save("k", &x.k) + m.Save("containerID", &x.containerID) + m.Save("tc", &x.tc) + m.Save("fsContext", &x.fsContext) + m.Save("fdTable", &x.fdTable) + m.Save("vforkParent", &x.vforkParent) + m.Save("exitState", &x.exitState) + m.Save("exitTracerNotified", &x.exitTracerNotified) + m.Save("exitTracerAcked", &x.exitTracerAcked) + m.Save("exitParentNotified", &x.exitParentNotified) + m.Save("exitParentAcked", &x.exitParentAcked) + m.Save("ptraceTracees", &x.ptraceTracees) + m.Save("ptraceSeized", &x.ptraceSeized) + m.Save("ptraceOpts", &x.ptraceOpts) + m.Save("ptraceSyscallMode", &x.ptraceSyscallMode) + m.Save("ptraceSinglestep", &x.ptraceSinglestep) + m.Save("ptraceCode", &x.ptraceCode) + m.Save("ptraceSiginfo", &x.ptraceSiginfo) + m.Save("ptraceEventMsg", &x.ptraceEventMsg) + m.Save("ioUsage", &x.ioUsage) + m.Save("creds", &x.creds) + m.Save("utsns", &x.utsns) + m.Save("ipcns", &x.ipcns) + m.Save("abstractSockets", &x.abstractSockets) + m.Save("parentDeathSignal", &x.parentDeathSignal) + m.Save("cleartid", &x.cleartid) + m.Save("allowedCPUMask", &x.allowedCPUMask) + m.Save("cpu", &x.cpu) + m.Save("niceness", &x.niceness) + m.Save("numaPolicy", &x.numaPolicy) + m.Save("numaNodeMask", &x.numaNodeMask) + m.Save("netns", &x.netns) + m.Save("rseqCPUAddr", &x.rseqCPUAddr) + m.Save("rseqCPU", &x.rseqCPU) + m.Save("startTime", &x.startTime) +} + +func (x *Task) load(m state.Map) { + m.Load("taskNode", &x.taskNode) + m.Load("runState", &x.runState) + m.Load("haveSyscallReturn", &x.haveSyscallReturn) + m.Load("gosched", &x.gosched) + m.Load("yieldCount", &x.yieldCount) + m.Load("pendingSignals", &x.pendingSignals) + m.Load("signalMask", &x.signalMask) + m.Load("realSignalMask", &x.realSignalMask) + m.Load("haveSavedSignalMask", &x.haveSavedSignalMask) + m.Load("savedSignalMask", &x.savedSignalMask) + m.Load("signalStack", &x.signalStack) + m.Load("groupStopPending", &x.groupStopPending) + m.Load("groupStopAcknowledged", &x.groupStopAcknowledged) + m.Load("trapStopPending", &x.trapStopPending) + m.Load("trapNotifyPending", &x.trapNotifyPending) + m.Load("stop", &x.stop) + m.Load("exitStatus", &x.exitStatus) + m.Load("syscallRestartBlock", &x.syscallRestartBlock) + m.Load("k", &x.k) + m.Load("containerID", &x.containerID) + m.Load("tc", &x.tc) + m.Load("fsContext", &x.fsContext) + m.Load("fdTable", &x.fdTable) + m.Load("vforkParent", &x.vforkParent) + m.Load("exitState", &x.exitState) + m.Load("exitTracerNotified", &x.exitTracerNotified) + m.Load("exitTracerAcked", &x.exitTracerAcked) + m.Load("exitParentNotified", &x.exitParentNotified) + m.Load("exitParentAcked", &x.exitParentAcked) + m.Load("ptraceTracees", &x.ptraceTracees) + m.Load("ptraceSeized", &x.ptraceSeized) + m.Load("ptraceOpts", &x.ptraceOpts) + m.Load("ptraceSyscallMode", &x.ptraceSyscallMode) + m.Load("ptraceSinglestep", &x.ptraceSinglestep) + m.Load("ptraceCode", &x.ptraceCode) + m.Load("ptraceSiginfo", &x.ptraceSiginfo) + m.Load("ptraceEventMsg", &x.ptraceEventMsg) + m.Load("ioUsage", &x.ioUsage) + m.Load("creds", &x.creds) + m.Load("utsns", &x.utsns) + m.Load("ipcns", &x.ipcns) + m.Load("abstractSockets", &x.abstractSockets) + m.Load("parentDeathSignal", &x.parentDeathSignal) + m.Load("cleartid", &x.cleartid) + m.Load("allowedCPUMask", &x.allowedCPUMask) + m.Load("cpu", &x.cpu) + m.Load("niceness", &x.niceness) + m.Load("numaPolicy", &x.numaPolicy) + m.Load("numaNodeMask", &x.numaNodeMask) + m.Load("netns", &x.netns) + m.Load("rseqCPUAddr", &x.rseqCPUAddr) + m.Load("rseqCPU", &x.rseqCPU) + m.Load("startTime", &x.startTime) + m.LoadValue("ptraceTracer", new(*Task), func(y interface{}) { x.loadPtraceTracer(y.(*Task)) }) + m.LoadValue("logPrefix", new(string), func(y interface{}) { x.loadLogPrefix(y.(string)) }) + m.LoadValue("syscallFilters", new([]bpf.Program), func(y interface{}) { x.loadSyscallFilters(y.([]bpf.Program)) }) + m.AfterLoad(x.afterLoad) +} + +func (x *runSyscallAfterPtraceEventClone) beforeSave() {} +func (x *runSyscallAfterPtraceEventClone) save(m state.Map) { + x.beforeSave() + m.Save("vforkChild", &x.vforkChild) + m.Save("vforkChildTID", &x.vforkChildTID) +} + +func (x *runSyscallAfterPtraceEventClone) afterLoad() {} +func (x *runSyscallAfterPtraceEventClone) load(m state.Map) { + m.Load("vforkChild", &x.vforkChild) + m.Load("vforkChildTID", &x.vforkChildTID) +} + +func (x *runSyscallAfterVforkStop) beforeSave() {} +func (x *runSyscallAfterVforkStop) save(m state.Map) { + x.beforeSave() + m.Save("childTID", &x.childTID) +} + +func (x *runSyscallAfterVforkStop) afterLoad() {} +func (x *runSyscallAfterVforkStop) load(m state.Map) { + m.Load("childTID", &x.childTID) +} + +func (x *vforkStop) beforeSave() {} +func (x *vforkStop) save(m state.Map) { + x.beforeSave() +} + +func (x *vforkStop) afterLoad() {} +func (x *vforkStop) load(m state.Map) { +} + +func (x *TaskContext) beforeSave() {} +func (x *TaskContext) save(m state.Map) { + x.beforeSave() + m.Save("Name", &x.Name) + m.Save("Arch", &x.Arch) + m.Save("MemoryManager", &x.MemoryManager) + m.Save("fu", &x.fu) + m.Save("st", &x.st) +} + +func (x *TaskContext) afterLoad() {} +func (x *TaskContext) load(m state.Map) { + m.Load("Name", &x.Name) + m.Load("Arch", &x.Arch) + m.Load("MemoryManager", &x.MemoryManager) + m.Load("fu", &x.fu) + m.Load("st", &x.st) +} + +func (x *execStop) beforeSave() {} +func (x *execStop) save(m state.Map) { + x.beforeSave() +} + +func (x *execStop) afterLoad() {} +func (x *execStop) load(m state.Map) { +} + +func (x *runSyscallAfterExecStop) beforeSave() {} +func (x *runSyscallAfterExecStop) save(m state.Map) { + x.beforeSave() + m.Save("tc", &x.tc) +} + +func (x *runSyscallAfterExecStop) afterLoad() {} +func (x *runSyscallAfterExecStop) load(m state.Map) { + m.Load("tc", &x.tc) +} + +func (x *ExitStatus) beforeSave() {} +func (x *ExitStatus) save(m state.Map) { + x.beforeSave() + m.Save("Code", &x.Code) + m.Save("Signo", &x.Signo) +} + +func (x *ExitStatus) afterLoad() {} +func (x *ExitStatus) load(m state.Map) { + m.Load("Code", &x.Code) + m.Load("Signo", &x.Signo) +} + +func (x *runExit) beforeSave() {} +func (x *runExit) save(m state.Map) { + x.beforeSave() +} + +func (x *runExit) afterLoad() {} +func (x *runExit) load(m state.Map) { +} + +func (x *runExitMain) beforeSave() {} +func (x *runExitMain) save(m state.Map) { + x.beforeSave() +} + +func (x *runExitMain) afterLoad() {} +func (x *runExitMain) load(m state.Map) { +} + +func (x *runExitNotify) beforeSave() {} +func (x *runExitNotify) save(m state.Map) { + x.beforeSave() +} + +func (x *runExitNotify) afterLoad() {} +func (x *runExitNotify) load(m state.Map) { +} + +func (x *taskList) beforeSave() {} +func (x *taskList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *taskList) afterLoad() {} +func (x *taskList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *taskEntry) beforeSave() {} +func (x *taskEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *taskEntry) afterLoad() {} +func (x *taskEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *runApp) beforeSave() {} +func (x *runApp) save(m state.Map) { + x.beforeSave() +} + +func (x *runApp) afterLoad() {} +func (x *runApp) load(m state.Map) { +} + +func (x *TaskGoroutineSchedInfo) beforeSave() {} +func (x *TaskGoroutineSchedInfo) save(m state.Map) { + x.beforeSave() + m.Save("Timestamp", &x.Timestamp) + m.Save("State", &x.State) + m.Save("UserTicks", &x.UserTicks) + m.Save("SysTicks", &x.SysTicks) +} + +func (x *TaskGoroutineSchedInfo) afterLoad() {} +func (x *TaskGoroutineSchedInfo) load(m state.Map) { + m.Load("Timestamp", &x.Timestamp) + m.Load("State", &x.State) + m.Load("UserTicks", &x.UserTicks) + m.Load("SysTicks", &x.SysTicks) +} + +func (x *taskClock) beforeSave() {} +func (x *taskClock) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("includeSys", &x.includeSys) +} + +func (x *taskClock) afterLoad() {} +func (x *taskClock) load(m state.Map) { + m.Load("t", &x.t) + m.Load("includeSys", &x.includeSys) +} + +func (x *tgClock) beforeSave() {} +func (x *tgClock) save(m state.Map) { + x.beforeSave() + m.Save("tg", &x.tg) + m.Save("includeSys", &x.includeSys) +} + +func (x *tgClock) afterLoad() {} +func (x *tgClock) load(m state.Map) { + m.Load("tg", &x.tg) + m.Load("includeSys", &x.includeSys) +} + +func (x *groupStop) beforeSave() {} +func (x *groupStop) save(m state.Map) { + x.beforeSave() +} + +func (x *groupStop) afterLoad() {} +func (x *groupStop) load(m state.Map) { +} + +func (x *runInterrupt) beforeSave() {} +func (x *runInterrupt) save(m state.Map) { + x.beforeSave() +} + +func (x *runInterrupt) afterLoad() {} +func (x *runInterrupt) load(m state.Map) { +} + +func (x *runInterruptAfterSignalDeliveryStop) beforeSave() {} +func (x *runInterruptAfterSignalDeliveryStop) save(m state.Map) { + x.beforeSave() +} + +func (x *runInterruptAfterSignalDeliveryStop) afterLoad() {} +func (x *runInterruptAfterSignalDeliveryStop) load(m state.Map) { +} + +func (x *runSyscallAfterSyscallEnterStop) beforeSave() {} +func (x *runSyscallAfterSyscallEnterStop) save(m state.Map) { + x.beforeSave() +} + +func (x *runSyscallAfterSyscallEnterStop) afterLoad() {} +func (x *runSyscallAfterSyscallEnterStop) load(m state.Map) { +} + +func (x *runSyscallAfterSysemuStop) beforeSave() {} +func (x *runSyscallAfterSysemuStop) save(m state.Map) { + x.beforeSave() +} + +func (x *runSyscallAfterSysemuStop) afterLoad() {} +func (x *runSyscallAfterSysemuStop) load(m state.Map) { +} + +func (x *runSyscallReinvoke) beforeSave() {} +func (x *runSyscallReinvoke) save(m state.Map) { + x.beforeSave() +} + +func (x *runSyscallReinvoke) afterLoad() {} +func (x *runSyscallReinvoke) load(m state.Map) { +} + +func (x *runSyscallExit) beforeSave() {} +func (x *runSyscallExit) save(m state.Map) { + x.beforeSave() +} + +func (x *runSyscallExit) afterLoad() {} +func (x *runSyscallExit) load(m state.Map) { +} + +func (x *ThreadGroup) beforeSave() {} +func (x *ThreadGroup) save(m state.Map) { + x.beforeSave() + var rscr *RSEQCriticalRegion = x.saveRscr() + m.SaveValue("rscr", rscr) + m.Save("threadGroupNode", &x.threadGroupNode) + m.Save("signalHandlers", &x.signalHandlers) + m.Save("pendingSignals", &x.pendingSignals) + m.Save("groupStopDequeued", &x.groupStopDequeued) + m.Save("groupStopSignal", &x.groupStopSignal) + m.Save("groupStopPendingCount", &x.groupStopPendingCount) + m.Save("groupStopComplete", &x.groupStopComplete) + m.Save("groupStopWaitable", &x.groupStopWaitable) + m.Save("groupContNotify", &x.groupContNotify) + m.Save("groupContInterrupted", &x.groupContInterrupted) + m.Save("groupContWaitable", &x.groupContWaitable) + m.Save("exiting", &x.exiting) + m.Save("exitStatus", &x.exitStatus) + m.Save("terminationSignal", &x.terminationSignal) + m.Save("itimerRealTimer", &x.itimerRealTimer) + m.Save("itimerVirtSetting", &x.itimerVirtSetting) + m.Save("itimerProfSetting", &x.itimerProfSetting) + m.Save("rlimitCPUSoftSetting", &x.rlimitCPUSoftSetting) + m.Save("cpuTimersEnabled", &x.cpuTimersEnabled) + m.Save("timers", &x.timers) + m.Save("nextTimerID", &x.nextTimerID) + m.Save("exitedCPUStats", &x.exitedCPUStats) + m.Save("childCPUStats", &x.childCPUStats) + m.Save("ioUsage", &x.ioUsage) + m.Save("maxRSS", &x.maxRSS) + m.Save("childMaxRSS", &x.childMaxRSS) + m.Save("limits", &x.limits) + m.Save("processGroup", &x.processGroup) + m.Save("execed", &x.execed) + m.Save("mounts", &x.mounts) + m.Save("tty", &x.tty) +} + +func (x *ThreadGroup) afterLoad() {} +func (x *ThreadGroup) load(m state.Map) { + m.Load("threadGroupNode", &x.threadGroupNode) + m.Load("signalHandlers", &x.signalHandlers) + m.Load("pendingSignals", &x.pendingSignals) + m.Load("groupStopDequeued", &x.groupStopDequeued) + m.Load("groupStopSignal", &x.groupStopSignal) + m.Load("groupStopPendingCount", &x.groupStopPendingCount) + m.Load("groupStopComplete", &x.groupStopComplete) + m.Load("groupStopWaitable", &x.groupStopWaitable) + m.Load("groupContNotify", &x.groupContNotify) + m.Load("groupContInterrupted", &x.groupContInterrupted) + m.Load("groupContWaitable", &x.groupContWaitable) + m.Load("exiting", &x.exiting) + m.Load("exitStatus", &x.exitStatus) + m.Load("terminationSignal", &x.terminationSignal) + m.Load("itimerRealTimer", &x.itimerRealTimer) + m.Load("itimerVirtSetting", &x.itimerVirtSetting) + m.Load("itimerProfSetting", &x.itimerProfSetting) + m.Load("rlimitCPUSoftSetting", &x.rlimitCPUSoftSetting) + m.Load("cpuTimersEnabled", &x.cpuTimersEnabled) + m.Load("timers", &x.timers) + m.Load("nextTimerID", &x.nextTimerID) + m.Load("exitedCPUStats", &x.exitedCPUStats) + m.Load("childCPUStats", &x.childCPUStats) + m.Load("ioUsage", &x.ioUsage) + m.Load("maxRSS", &x.maxRSS) + m.Load("childMaxRSS", &x.childMaxRSS) + m.Load("limits", &x.limits) + m.Load("processGroup", &x.processGroup) + m.Load("execed", &x.execed) + m.Load("mounts", &x.mounts) + m.Load("tty", &x.tty) + m.LoadValue("rscr", new(*RSEQCriticalRegion), func(y interface{}) { x.loadRscr(y.(*RSEQCriticalRegion)) }) +} + +func (x *itimerRealListener) beforeSave() {} +func (x *itimerRealListener) save(m state.Map) { + x.beforeSave() + m.Save("tg", &x.tg) +} + +func (x *itimerRealListener) afterLoad() {} +func (x *itimerRealListener) load(m state.Map) { + m.Load("tg", &x.tg) +} + +func (x *TaskSet) beforeSave() {} +func (x *TaskSet) save(m state.Map) { + x.beforeSave() + m.Save("Root", &x.Root) + m.Save("sessions", &x.sessions) +} + +func (x *TaskSet) afterLoad() {} +func (x *TaskSet) load(m state.Map) { + m.Load("Root", &x.Root) + m.Load("sessions", &x.sessions) +} + +func (x *PIDNamespace) beforeSave() {} +func (x *PIDNamespace) save(m state.Map) { + x.beforeSave() + m.Save("owner", &x.owner) + m.Save("parent", &x.parent) + m.Save("userns", &x.userns) + m.Save("last", &x.last) + m.Save("tasks", &x.tasks) + m.Save("tids", &x.tids) + m.Save("tgids", &x.tgids) + m.Save("sessions", &x.sessions) + m.Save("sids", &x.sids) + m.Save("processGroups", &x.processGroups) + m.Save("pgids", &x.pgids) + m.Save("exiting", &x.exiting) +} + +func (x *PIDNamespace) afterLoad() {} +func (x *PIDNamespace) load(m state.Map) { + m.Load("owner", &x.owner) + m.Load("parent", &x.parent) + m.Load("userns", &x.userns) + m.Load("last", &x.last) + m.Load("tasks", &x.tasks) + m.Load("tids", &x.tids) + m.Load("tgids", &x.tgids) + m.Load("sessions", &x.sessions) + m.Load("sids", &x.sids) + m.Load("processGroups", &x.processGroups) + m.Load("pgids", &x.pgids) + m.Load("exiting", &x.exiting) +} + +func (x *threadGroupNode) beforeSave() {} +func (x *threadGroupNode) save(m state.Map) { + x.beforeSave() + m.Save("pidns", &x.pidns) + m.Save("leader", &x.leader) + m.Save("execing", &x.execing) + m.Save("tasks", &x.tasks) + m.Save("tasksCount", &x.tasksCount) + m.Save("liveTasks", &x.liveTasks) + m.Save("activeTasks", &x.activeTasks) +} + +func (x *threadGroupNode) afterLoad() {} +func (x *threadGroupNode) load(m state.Map) { + m.Load("pidns", &x.pidns) + m.Load("leader", &x.leader) + m.Load("execing", &x.execing) + m.Load("tasks", &x.tasks) + m.Load("tasksCount", &x.tasksCount) + m.Load("liveTasks", &x.liveTasks) + m.Load("activeTasks", &x.activeTasks) +} + +func (x *taskNode) beforeSave() {} +func (x *taskNode) save(m state.Map) { + x.beforeSave() + m.Save("tg", &x.tg) + m.Save("taskEntry", &x.taskEntry) + m.Save("parent", &x.parent) + m.Save("children", &x.children) + m.Save("childPIDNamespace", &x.childPIDNamespace) +} + +func (x *taskNode) afterLoad() {} +func (x *taskNode) load(m state.Map) { + m.LoadWait("tg", &x.tg) + m.Load("taskEntry", &x.taskEntry) + m.Load("parent", &x.parent) + m.Load("children", &x.children) + m.Load("childPIDNamespace", &x.childPIDNamespace) +} + +func (x *Timekeeper) save(m state.Map) { + x.beforeSave() + m.Save("bootTime", &x.bootTime) + m.Save("saveMonotonic", &x.saveMonotonic) + m.Save("saveRealtime", &x.saveRealtime) + m.Save("params", &x.params) +} + +func (x *Timekeeper) load(m state.Map) { + m.Load("bootTime", &x.bootTime) + m.Load("saveMonotonic", &x.saveMonotonic) + m.Load("saveRealtime", &x.saveRealtime) + m.Load("params", &x.params) + m.AfterLoad(x.afterLoad) +} + +func (x *timekeeperClock) beforeSave() {} +func (x *timekeeperClock) save(m state.Map) { + x.beforeSave() + m.Save("tk", &x.tk) + m.Save("c", &x.c) +} + +func (x *timekeeperClock) afterLoad() {} +func (x *timekeeperClock) load(m state.Map) { + m.Load("tk", &x.tk) + m.Load("c", &x.c) +} + +func (x *TTY) beforeSave() {} +func (x *TTY) save(m state.Map) { + x.beforeSave() + m.Save("tg", &x.tg) +} + +func (x *TTY) afterLoad() {} +func (x *TTY) load(m state.Map) { + m.Load("tg", &x.tg) +} + +func (x *UTSNamespace) beforeSave() {} +func (x *UTSNamespace) save(m state.Map) { + x.beforeSave() + m.Save("hostName", &x.hostName) + m.Save("domainName", &x.domainName) + m.Save("userns", &x.userns) +} + +func (x *UTSNamespace) afterLoad() {} +func (x *UTSNamespace) load(m state.Map) { + m.Load("hostName", &x.hostName) + m.Load("domainName", &x.domainName) + m.Load("userns", &x.userns) +} + +func (x *VDSOParamPage) beforeSave() {} +func (x *VDSOParamPage) save(m state.Map) { + x.beforeSave() + m.Save("mfp", &x.mfp) + m.Save("fr", &x.fr) + m.Save("seq", &x.seq) +} + +func (x *VDSOParamPage) afterLoad() {} +func (x *VDSOParamPage) load(m state.Map) { + m.Load("mfp", &x.mfp) + m.Load("fr", &x.fr) + m.Load("seq", &x.seq) +} + +func init() { + state.Register("kernel.abstractEndpoint", (*abstractEndpoint)(nil), state.Fns{Save: (*abstractEndpoint).save, Load: (*abstractEndpoint).load}) + state.Register("kernel.AbstractSocketNamespace", (*AbstractSocketNamespace)(nil), state.Fns{Save: (*AbstractSocketNamespace).save, Load: (*AbstractSocketNamespace).load}) + state.Register("kernel.FDFlags", (*FDFlags)(nil), state.Fns{Save: (*FDFlags).save, Load: (*FDFlags).load}) + state.Register("kernel.descriptor", (*descriptor)(nil), state.Fns{Save: (*descriptor).save, Load: (*descriptor).load}) + state.Register("kernel.FDTable", (*FDTable)(nil), state.Fns{Save: (*FDTable).save, Load: (*FDTable).load}) + state.Register("kernel.FSContext", (*FSContext)(nil), state.Fns{Save: (*FSContext).save, Load: (*FSContext).load}) + state.Register("kernel.IPCNamespace", (*IPCNamespace)(nil), state.Fns{Save: (*IPCNamespace).save, Load: (*IPCNamespace).load}) + state.Register("kernel.Kernel", (*Kernel)(nil), state.Fns{Save: (*Kernel).save, Load: (*Kernel).load}) + state.Register("kernel.SocketEntry", (*SocketEntry)(nil), state.Fns{Save: (*SocketEntry).save, Load: (*SocketEntry).load}) + state.Register("kernel.pendingSignals", (*pendingSignals)(nil), state.Fns{Save: (*pendingSignals).save, Load: (*pendingSignals).load}) + state.Register("kernel.pendingSignalQueue", (*pendingSignalQueue)(nil), state.Fns{Save: (*pendingSignalQueue).save, Load: (*pendingSignalQueue).load}) + state.Register("kernel.pendingSignal", (*pendingSignal)(nil), state.Fns{Save: (*pendingSignal).save, Load: (*pendingSignal).load}) + state.Register("kernel.pendingSignalList", (*pendingSignalList)(nil), state.Fns{Save: (*pendingSignalList).save, Load: (*pendingSignalList).load}) + state.Register("kernel.pendingSignalEntry", (*pendingSignalEntry)(nil), state.Fns{Save: (*pendingSignalEntry).save, Load: (*pendingSignalEntry).load}) + state.Register("kernel.savedPendingSignal", (*savedPendingSignal)(nil), state.Fns{Save: (*savedPendingSignal).save, Load: (*savedPendingSignal).load}) + state.Register("kernel.IntervalTimer", (*IntervalTimer)(nil), state.Fns{Save: (*IntervalTimer).save, Load: (*IntervalTimer).load}) + state.Register("kernel.processGroupList", (*processGroupList)(nil), state.Fns{Save: (*processGroupList).save, Load: (*processGroupList).load}) + state.Register("kernel.processGroupEntry", (*processGroupEntry)(nil), state.Fns{Save: (*processGroupEntry).save, Load: (*processGroupEntry).load}) + state.Register("kernel.ptraceOptions", (*ptraceOptions)(nil), state.Fns{Save: (*ptraceOptions).save, Load: (*ptraceOptions).load}) + state.Register("kernel.ptraceStop", (*ptraceStop)(nil), state.Fns{Save: (*ptraceStop).save, Load: (*ptraceStop).load}) + state.Register("kernel.RSEQCriticalRegion", (*RSEQCriticalRegion)(nil), state.Fns{Save: (*RSEQCriticalRegion).save, Load: (*RSEQCriticalRegion).load}) + state.Register("kernel.sessionList", (*sessionList)(nil), state.Fns{Save: (*sessionList).save, Load: (*sessionList).load}) + state.Register("kernel.sessionEntry", (*sessionEntry)(nil), state.Fns{Save: (*sessionEntry).save, Load: (*sessionEntry).load}) + state.Register("kernel.Session", (*Session)(nil), state.Fns{Save: (*Session).save, Load: (*Session).load}) + state.Register("kernel.ProcessGroup", (*ProcessGroup)(nil), state.Fns{Save: (*ProcessGroup).save, Load: (*ProcessGroup).load}) + state.Register("kernel.SignalHandlers", (*SignalHandlers)(nil), state.Fns{Save: (*SignalHandlers).save, Load: (*SignalHandlers).load}) + state.Register("kernel.socketList", (*socketList)(nil), state.Fns{Save: (*socketList).save, Load: (*socketList).load}) + state.Register("kernel.socketEntry", (*socketEntry)(nil), state.Fns{Save: (*socketEntry).save, Load: (*socketEntry).load}) + state.Register("kernel.SyscallTable", (*SyscallTable)(nil), state.Fns{Save: (*SyscallTable).save, Load: (*SyscallTable).load}) + state.Register("kernel.syslog", (*syslog)(nil), state.Fns{Save: (*syslog).save, Load: (*syslog).load}) + state.Register("kernel.Task", (*Task)(nil), state.Fns{Save: (*Task).save, Load: (*Task).load}) + state.Register("kernel.runSyscallAfterPtraceEventClone", (*runSyscallAfterPtraceEventClone)(nil), state.Fns{Save: (*runSyscallAfterPtraceEventClone).save, Load: (*runSyscallAfterPtraceEventClone).load}) + state.Register("kernel.runSyscallAfterVforkStop", (*runSyscallAfterVforkStop)(nil), state.Fns{Save: (*runSyscallAfterVforkStop).save, Load: (*runSyscallAfterVforkStop).load}) + state.Register("kernel.vforkStop", (*vforkStop)(nil), state.Fns{Save: (*vforkStop).save, Load: (*vforkStop).load}) + state.Register("kernel.TaskContext", (*TaskContext)(nil), state.Fns{Save: (*TaskContext).save, Load: (*TaskContext).load}) + state.Register("kernel.execStop", (*execStop)(nil), state.Fns{Save: (*execStop).save, Load: (*execStop).load}) + state.Register("kernel.runSyscallAfterExecStop", (*runSyscallAfterExecStop)(nil), state.Fns{Save: (*runSyscallAfterExecStop).save, Load: (*runSyscallAfterExecStop).load}) + state.Register("kernel.ExitStatus", (*ExitStatus)(nil), state.Fns{Save: (*ExitStatus).save, Load: (*ExitStatus).load}) + state.Register("kernel.runExit", (*runExit)(nil), state.Fns{Save: (*runExit).save, Load: (*runExit).load}) + state.Register("kernel.runExitMain", (*runExitMain)(nil), state.Fns{Save: (*runExitMain).save, Load: (*runExitMain).load}) + state.Register("kernel.runExitNotify", (*runExitNotify)(nil), state.Fns{Save: (*runExitNotify).save, Load: (*runExitNotify).load}) + state.Register("kernel.taskList", (*taskList)(nil), state.Fns{Save: (*taskList).save, Load: (*taskList).load}) + state.Register("kernel.taskEntry", (*taskEntry)(nil), state.Fns{Save: (*taskEntry).save, Load: (*taskEntry).load}) + state.Register("kernel.runApp", (*runApp)(nil), state.Fns{Save: (*runApp).save, Load: (*runApp).load}) + state.Register("kernel.TaskGoroutineSchedInfo", (*TaskGoroutineSchedInfo)(nil), state.Fns{Save: (*TaskGoroutineSchedInfo).save, Load: (*TaskGoroutineSchedInfo).load}) + state.Register("kernel.taskClock", (*taskClock)(nil), state.Fns{Save: (*taskClock).save, Load: (*taskClock).load}) + state.Register("kernel.tgClock", (*tgClock)(nil), state.Fns{Save: (*tgClock).save, Load: (*tgClock).load}) + state.Register("kernel.groupStop", (*groupStop)(nil), state.Fns{Save: (*groupStop).save, Load: (*groupStop).load}) + state.Register("kernel.runInterrupt", (*runInterrupt)(nil), state.Fns{Save: (*runInterrupt).save, Load: (*runInterrupt).load}) + state.Register("kernel.runInterruptAfterSignalDeliveryStop", (*runInterruptAfterSignalDeliveryStop)(nil), state.Fns{Save: (*runInterruptAfterSignalDeliveryStop).save, Load: (*runInterruptAfterSignalDeliveryStop).load}) + state.Register("kernel.runSyscallAfterSyscallEnterStop", (*runSyscallAfterSyscallEnterStop)(nil), state.Fns{Save: (*runSyscallAfterSyscallEnterStop).save, Load: (*runSyscallAfterSyscallEnterStop).load}) + state.Register("kernel.runSyscallAfterSysemuStop", (*runSyscallAfterSysemuStop)(nil), state.Fns{Save: (*runSyscallAfterSysemuStop).save, Load: (*runSyscallAfterSysemuStop).load}) + state.Register("kernel.runSyscallReinvoke", (*runSyscallReinvoke)(nil), state.Fns{Save: (*runSyscallReinvoke).save, Load: (*runSyscallReinvoke).load}) + state.Register("kernel.runSyscallExit", (*runSyscallExit)(nil), state.Fns{Save: (*runSyscallExit).save, Load: (*runSyscallExit).load}) + state.Register("kernel.ThreadGroup", (*ThreadGroup)(nil), state.Fns{Save: (*ThreadGroup).save, Load: (*ThreadGroup).load}) + state.Register("kernel.itimerRealListener", (*itimerRealListener)(nil), state.Fns{Save: (*itimerRealListener).save, Load: (*itimerRealListener).load}) + state.Register("kernel.TaskSet", (*TaskSet)(nil), state.Fns{Save: (*TaskSet).save, Load: (*TaskSet).load}) + state.Register("kernel.PIDNamespace", (*PIDNamespace)(nil), state.Fns{Save: (*PIDNamespace).save, Load: (*PIDNamespace).load}) + state.Register("kernel.threadGroupNode", (*threadGroupNode)(nil), state.Fns{Save: (*threadGroupNode).save, Load: (*threadGroupNode).load}) + state.Register("kernel.taskNode", (*taskNode)(nil), state.Fns{Save: (*taskNode).save, Load: (*taskNode).load}) + state.Register("kernel.Timekeeper", (*Timekeeper)(nil), state.Fns{Save: (*Timekeeper).save, Load: (*Timekeeper).load}) + state.Register("kernel.timekeeperClock", (*timekeeperClock)(nil), state.Fns{Save: (*timekeeperClock).save, Load: (*timekeeperClock).load}) + state.Register("kernel.TTY", (*TTY)(nil), state.Fns{Save: (*TTY).save, Load: (*TTY).load}) + state.Register("kernel.UTSNamespace", (*UTSNamespace)(nil), state.Fns{Save: (*UTSNamespace).save, Load: (*UTSNamespace).load}) + state.Register("kernel.VDSOParamPage", (*VDSOParamPage)(nil), state.Fns{Save: (*VDSOParamPage).save, Load: (*VDSOParamPage).load}) +} diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD deleted file mode 100644 index ebcfaa619..000000000 --- a/pkg/sentry/kernel/memevent/BUILD +++ /dev/null @@ -1,32 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "memevent", - srcs = ["memory_events.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent", - visibility = ["//:sandbox"], - deps = [ - ":memory_events_go_proto", - "//pkg/eventchannel", - "//pkg/log", - "//pkg/metric", - "//pkg/sentry/kernel", - "//pkg/sentry/usage", - ], -) - -proto_library( - name = "memory_events_proto", - srcs = ["memory_events.proto"], - visibility = ["//visibility:public"], -) - -go_proto_library( - name = "memory_events_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto", - proto = ":memory_events_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/kernel/memevent/memory_events.go b/pkg/sentry/kernel/memevent/memory_events.go deleted file mode 100644 index b0d98e7f0..000000000 --- a/pkg/sentry/kernel/memevent/memory_events.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package memevent implements the memory usage events controller, which -// periodically emits events via the eventchannel. -package memevent - -import ( - "sync" - "time" - - "gvisor.dev/gvisor/pkg/eventchannel" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/metric" - "gvisor.dev/gvisor/pkg/sentry/kernel" - pb "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto" - "gvisor.dev/gvisor/pkg/sentry/usage" -) - -var totalTicks = metric.MustCreateNewUint64Metric("/memory_events/ticks", false /*sync*/, "Total number of memory event periods that have elapsed since startup.") -var totalEvents = metric.MustCreateNewUint64Metric("/memory_events/events", false /*sync*/, "Total number of memory events emitted.") - -// MemoryEvents describes the configuration for the global memory event emitter. -type MemoryEvents struct { - k *kernel.Kernel - - // The period is how often to emit an event. The memory events goroutine - // will ensure a minimum of one event is emitted per this period, regardless - // how of much memory usage has changed. - period time.Duration - - // Writing to this channel indicates the memory goroutine should stop. - stop chan struct{} - - // done is used to signal when the memory event goroutine has exited. - done sync.WaitGroup -} - -// New creates a new MemoryEvents. -func New(k *kernel.Kernel, period time.Duration) *MemoryEvents { - return &MemoryEvents{ - k: k, - period: period, - stop: make(chan struct{}), - } -} - -// Stop stops the memory usage events emitter goroutine. Stop must not be called -// concurrently with Start and may only be called once. -func (m *MemoryEvents) Stop() { - close(m.stop) - m.done.Wait() -} - -// Start starts the memory usage events emitter goroutine. Start must not be -// called concurrently with Stop and may only be called once. -func (m *MemoryEvents) Start() { - if m.period == 0 { - return - } - m.done.Add(1) - go m.run() // S/R-SAFE: doesn't interact with saved state. -} - -func (m *MemoryEvents) run() { - defer m.done.Done() - - // Emit the first event immediately on startup. - totalTicks.Increment() - m.emit() - - ticker := time.NewTicker(m.period) - defer ticker.Stop() - - for { - select { - case <-m.stop: - return - case <-ticker.C: - totalTicks.Increment() - m.emit() - } - } -} - -func (m *MemoryEvents) emit() { - totalPlatform, err := m.k.MemoryFile().TotalUsage() - if err != nil { - log.Warningf("Failed to fetch memory usage for memory events: %v", err) - return - } - snapshot, _ := usage.MemoryAccounting.Copy() - total := totalPlatform + snapshot.Mapped - - totalEvents.Increment() - eventchannel.Emit(&pb.MemoryUsageEvent{ - Mapped: snapshot.Mapped, - Total: total, - }) -} diff --git a/pkg/sentry/kernel/memevent/memory_events.proto b/pkg/sentry/kernel/memevent/memory_events.proto deleted file mode 100644 index bf8029ff5..000000000 --- a/pkg/sentry/kernel/memevent/memory_events.proto +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -// MemoryUsageEvent describes the memory usage of the sandbox at a single -// instant in time. These messages are emitted periodically on the eventchannel. -message MemoryUsageEvent { - // The total memory usage of the sandboxed application in bytes, calculated - // using the 'fast' method. - uint64 total = 1; - - // Memory used to back memory-mapped regions for files in the application, in - // bytes. This corresponds to the usage.MemoryKind.Mapped memory type. - uint64 mapped = 2; -} diff --git a/pkg/sentry/kernel/pending_signals_list.go b/pkg/sentry/kernel/pending_signals_list.go new file mode 100755 index 000000000..a3499371a --- /dev/null +++ b/pkg/sentry/kernel/pending_signals_list.go @@ -0,0 +1,173 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type pendingSignalElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (pendingSignalElementMapper) linkerFor(elem *pendingSignal) *pendingSignal { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type pendingSignalList struct { + head *pendingSignal + tail *pendingSignal +} + +// Reset resets list l to the empty state. +func (l *pendingSignalList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *pendingSignalList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *pendingSignalList) Front() *pendingSignal { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *pendingSignalList) Back() *pendingSignal { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *pendingSignalList) PushFront(e *pendingSignal) { + pendingSignalElementMapper{}.linkerFor(e).SetNext(l.head) + pendingSignalElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + pendingSignalElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *pendingSignalList) PushBack(e *pendingSignal) { + pendingSignalElementMapper{}.linkerFor(e).SetNext(nil) + pendingSignalElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + pendingSignalElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *pendingSignalList) PushBackList(m *pendingSignalList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + pendingSignalElementMapper{}.linkerFor(l.tail).SetNext(m.head) + pendingSignalElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *pendingSignalList) InsertAfter(b, e *pendingSignal) { + a := pendingSignalElementMapper{}.linkerFor(b).Next() + pendingSignalElementMapper{}.linkerFor(e).SetNext(a) + pendingSignalElementMapper{}.linkerFor(e).SetPrev(b) + pendingSignalElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + pendingSignalElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *pendingSignalList) InsertBefore(a, e *pendingSignal) { + b := pendingSignalElementMapper{}.linkerFor(a).Prev() + pendingSignalElementMapper{}.linkerFor(e).SetNext(a) + pendingSignalElementMapper{}.linkerFor(e).SetPrev(b) + pendingSignalElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + pendingSignalElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *pendingSignalList) Remove(e *pendingSignal) { + prev := pendingSignalElementMapper{}.linkerFor(e).Prev() + next := pendingSignalElementMapper{}.linkerFor(e).Next() + + if prev != nil { + pendingSignalElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + pendingSignalElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type pendingSignalEntry struct { + next *pendingSignal + prev *pendingSignal +} + +// Next returns the entry that follows e in the list. +func (e *pendingSignalEntry) Next() *pendingSignal { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *pendingSignalEntry) Prev() *pendingSignal { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *pendingSignalEntry) SetNext(elem *pendingSignal) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *pendingSignalEntry) SetPrev(elem *pendingSignal) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD deleted file mode 100644 index 4d15cca85..000000000 --- a/pkg/sentry/kernel/pipe/BUILD +++ /dev/null @@ -1,64 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "buffer_list", - out = "buffer_list.go", - package = "pipe", - prefix = "buffer", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*buffer", - "Linker": "*buffer", - }, -) - -go_library( - name = "pipe", - srcs = [ - "buffer.go", - "buffer_list.go", - "device.go", - "node.go", - "pipe.go", - "reader.go", - "reader_writer.go", - "writer.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/pipe", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "pipe_test", - size = "small", - srcs = [ - "buffer_test.go", - "node_test.go", - "pipe_test.go", - ], - embed = [":pipe"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/pipe/buffer_list.go b/pkg/sentry/kernel/pipe/buffer_list.go new file mode 100755 index 000000000..42ec78788 --- /dev/null +++ b/pkg/sentry/kernel/pipe/buffer_list.go @@ -0,0 +1,173 @@ +package pipe + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type bufferElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (bufferElementMapper) linkerFor(elem *buffer) *buffer { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type bufferList struct { + head *buffer + tail *buffer +} + +// Reset resets list l to the empty state. +func (l *bufferList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *bufferList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *bufferList) Front() *buffer { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *bufferList) Back() *buffer { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *bufferList) PushFront(e *buffer) { + bufferElementMapper{}.linkerFor(e).SetNext(l.head) + bufferElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + bufferElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *bufferList) PushBack(e *buffer) { + bufferElementMapper{}.linkerFor(e).SetNext(nil) + bufferElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + bufferElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *bufferList) PushBackList(m *bufferList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + bufferElementMapper{}.linkerFor(l.tail).SetNext(m.head) + bufferElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *bufferList) InsertAfter(b, e *buffer) { + a := bufferElementMapper{}.linkerFor(b).Next() + bufferElementMapper{}.linkerFor(e).SetNext(a) + bufferElementMapper{}.linkerFor(e).SetPrev(b) + bufferElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + bufferElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *bufferList) InsertBefore(a, e *buffer) { + b := bufferElementMapper{}.linkerFor(a).Prev() + bufferElementMapper{}.linkerFor(e).SetNext(a) + bufferElementMapper{}.linkerFor(e).SetPrev(b) + bufferElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + bufferElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *bufferList) Remove(e *buffer) { + prev := bufferElementMapper{}.linkerFor(e).Prev() + next := bufferElementMapper{}.linkerFor(e).Next() + + if prev != nil { + bufferElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + bufferElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type bufferEntry struct { + next *buffer + prev *buffer +} + +// Next returns the entry that follows e in the list. +func (e *bufferEntry) Next() *buffer { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *bufferEntry) Prev() *buffer { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *bufferEntry) SetNext(elem *buffer) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *bufferEntry) SetPrev(elem *buffer) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/pipe/buffer_test.go b/pkg/sentry/kernel/pipe/buffer_test.go deleted file mode 100644 index ee1b90115..000000000 --- a/pkg/sentry/kernel/pipe/buffer_test.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "testing" - "unsafe" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -func TestBufferSize(t *testing.T) { - bufferSize := unsafe.Sizeof(buffer{}) - if bufferSize < usermem.PageSize { - t.Errorf("buffer is less than a page") - } - if bufferSize > (2 * usermem.PageSize) { - t.Errorf("buffer is greater than two pages") - } -} diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go deleted file mode 100644 index adbad7764..000000000 --- a/pkg/sentry/kernel/pipe/node_test.go +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "testing" - "time" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" -) - -type sleeper struct { - context.Context - ch chan struct{} -} - -func newSleeperContext(t *testing.T) context.Context { - return &sleeper{ - Context: contexttest.Context(t), - ch: make(chan struct{}), - } -} - -func (s *sleeper) SleepStart() <-chan struct{} { - return s.ch -} - -func (s *sleeper) SleepFinish(bool) { -} - -func (s *sleeper) Cancel() { - s.ch <- struct{}{} -} - -func (s *sleeper) Interrupted() bool { - return len(s.ch) != 0 -} - -type openResult struct { - *fs.File - error -} - -var perms fs.FilePermissions = fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, -} - -func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, doneChan chan<- struct{}) (*fs.File, error) { - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(ctx, inode, "pipe") - file, err := n.GetFile(ctx, d, flags) - if err != nil { - t.Fatalf("open with flags %+v failed: %v", flags, err) - } - if doneChan != nil { - doneChan <- struct{}{} - } - return file, err -} - -func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, resChan chan<- openResult) (*fs.File, error) { - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(ctx, inode, "pipe") - file, err := n.GetFile(ctx, d, flags) - if resChan != nil { - resChan <- openResult{file, err} - } - return file, err -} - -func newNamedPipe(t *testing.T) *Pipe { - return NewPipe(contexttest.Context(t), true, DefaultPipeSize, usermem.PageSize) -} - -func newAnonPipe(t *testing.T) *Pipe { - return NewPipe(contexttest.Context(t), false, DefaultPipeSize, usermem.PageSize) -} - -// assertRecvBlocks ensures that a recv attempt on c blocks for at least -// blockDuration. This is useful for checking that a goroutine that is supposed -// to be executing a blocking operation is actually blocking. -func assertRecvBlocks(t *testing.T, c <-chan struct{}, blockDuration time.Duration, failMsg string) { - select { - case <-c: - t.Fatalf(failMsg) - case <-time.After(blockDuration): - // Ok, blocked for the required duration. - } -} - -func TestReadOpenBlocksForWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - // Verify that the open for read is blocking. - assertRecvBlocks(t, rDone, time.Millisecond*100, - "open for read not blocking with no writers") - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - <-wDone - <-rDone -} - -func TestWriteOpenBlocksForReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - // Verify that the open for write is blocking - assertRecvBlocks(t, wDone, time.Millisecond*100, - "open for write not blocking with no readers") - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - <-rDone - <-wDone -} - -func TestMultipleWriteOpenDoesntCountAsReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone1 := make(chan struct{}) - rDone2 := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone1) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone2) - - assertRecvBlocks(t, rDone1, time.Millisecond*100, - "open for read didn't block with no writers") - assertRecvBlocks(t, rDone2, time.Millisecond*100, - "open for read didn't block with no writers") - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - <-wDone - <-rDone2 - <-rDone1 -} - -func TestClosedReaderBlocksWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rFile, _ := testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil) - rFile.DecRef() - - wDone := make(chan struct{}) - // This open for write should block because the reader is now gone. - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - assertRecvBlocks(t, wDone, time.Millisecond*100, - "open for write didn't block with no concurrent readers") - - // Open for read again. This should unblock the open for write. - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - <-rDone - <-wDone -} - -func TestReadWriteOpenNeverBlocks(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rwDone := make(chan struct{}) - // Open for read-write never wait for a reader or writer, even if the - // nonblocking flag is not set. - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true, NonBlocking: false}, rwDone) - <-rwDone -} - -func TestReadWriteOpenUnblocksReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - rwDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true}, rwDone) - - <-rwDone - <-rDone -} - -func TestReadWriteOpenUnblocksWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - rwDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, Write: true}, rwDone) - - <-rwDone - <-wDone -} - -func TestBlockedOpenIsCancellable(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - done := make(chan openResult) - go testOpen(ctx, t, f, fs.FileFlags{Read: true}, done) - select { - case <-done: - t.Fatalf("open for read didn't block with no writers") - case <-time.After(time.Millisecond * 100): - // Ok. - } - - ctx.(*sleeper).Cancel() - // If the cancel on the sleeper didn't work, the open for read would never - // return. - res := <-done - if res.error != syserror.ErrInterrupted { - t.Fatalf("Cancellation didn't cause GetFile to return fs.ErrInterrupted, got %v.", - res.error) - } -} - -func TestNonblockingReadOpenFileNoWriters(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for read failed with error %v.", err) - } -} - -func TestNonblockingWriteOpenFileNoReaders(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); err != syserror.ENXIO { - t.Fatalf("Nonblocking open for write failed unexpected error %v.", err) - } -} - -func TestNonBlockingReadOpenWithWriter(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - wDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Write: true}, wDone) - - // Open for write blocks since there are no readers yet. - assertRecvBlocks(t, wDone, time.Millisecond*100, - "Open for write didn't block with no reader.") - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for read failed with error %v.", err) - } - - // Open for write should now be unblocked. - <-wDone -} - -func TestNonBlockingWriteOpenWithReader(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - - rDone := make(chan struct{}) - go testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true}, rDone) - - // Open for write blocked, since no reader yet. - assertRecvBlocks(t, rDone, time.Millisecond*100, - "Open for reader didn't block with no writer.") - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); err != nil { - t.Fatalf("Nonblocking open for write failed with error %v.", err) - } - - // Open for write should now be unblocked. - <-rDone -} - -func TestAnonReadOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newAnonPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Read: true}, nil); err != nil { - t.Fatalf("open anon pipe for read failed: %v", err) - } -} - -func TestAnonWriteOpen(t *testing.T) { - ctx := newSleeperContext(t) - f := NewInodeOperations(ctx, perms, newAnonPipe(t)) - - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true}, nil); err != nil { - t.Fatalf("open anon pipe for write failed: %v", err) - } -} diff --git a/pkg/sentry/kernel/pipe/pipe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_state_autogen.go new file mode 100755 index 000000000..cad035789 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_state_autogen.go @@ -0,0 +1,132 @@ +// automatically generated by stateify. + +package pipe + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *buffer) beforeSave() {} +func (x *buffer) save(m state.Map) { + x.beforeSave() + m.Save("data", &x.data) + m.Save("read", &x.read) + m.Save("write", &x.write) + m.Save("bufferEntry", &x.bufferEntry) +} + +func (x *buffer) afterLoad() {} +func (x *buffer) load(m state.Map) { + m.Load("data", &x.data) + m.Load("read", &x.read) + m.Load("write", &x.write) + m.Load("bufferEntry", &x.bufferEntry) +} + +func (x *bufferList) beforeSave() {} +func (x *bufferList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *bufferList) afterLoad() {} +func (x *bufferList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *bufferEntry) beforeSave() {} +func (x *bufferEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *bufferEntry) afterLoad() {} +func (x *bufferEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *inodeOperations) beforeSave() {} +func (x *inodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("p", &x.p) +} + +func (x *inodeOperations) afterLoad() {} +func (x *inodeOperations) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("p", &x.p) +} + +func (x *Pipe) beforeSave() {} +func (x *Pipe) save(m state.Map) { + x.beforeSave() + m.Save("isNamed", &x.isNamed) + m.Save("atomicIOBytes", &x.atomicIOBytes) + m.Save("readers", &x.readers) + m.Save("writers", &x.writers) + m.Save("data", &x.data) + m.Save("max", &x.max) + m.Save("size", &x.size) + m.Save("hadWriter", &x.hadWriter) +} + +func (x *Pipe) afterLoad() {} +func (x *Pipe) load(m state.Map) { + m.Load("isNamed", &x.isNamed) + m.Load("atomicIOBytes", &x.atomicIOBytes) + m.Load("readers", &x.readers) + m.Load("writers", &x.writers) + m.Load("data", &x.data) + m.Load("max", &x.max) + m.Load("size", &x.size) + m.Load("hadWriter", &x.hadWriter) +} + +func (x *Reader) beforeSave() {} +func (x *Reader) save(m state.Map) { + x.beforeSave() + m.Save("ReaderWriter", &x.ReaderWriter) +} + +func (x *Reader) afterLoad() {} +func (x *Reader) load(m state.Map) { + m.Load("ReaderWriter", &x.ReaderWriter) +} + +func (x *ReaderWriter) beforeSave() {} +func (x *ReaderWriter) save(m state.Map) { + x.beforeSave() + m.Save("Pipe", &x.Pipe) +} + +func (x *ReaderWriter) afterLoad() {} +func (x *ReaderWriter) load(m state.Map) { + m.Load("Pipe", &x.Pipe) +} + +func (x *Writer) beforeSave() {} +func (x *Writer) save(m state.Map) { + x.beforeSave() + m.Save("ReaderWriter", &x.ReaderWriter) +} + +func (x *Writer) afterLoad() {} +func (x *Writer) load(m state.Map) { + m.Load("ReaderWriter", &x.ReaderWriter) +} + +func init() { + state.Register("pipe.buffer", (*buffer)(nil), state.Fns{Save: (*buffer).save, Load: (*buffer).load}) + state.Register("pipe.bufferList", (*bufferList)(nil), state.Fns{Save: (*bufferList).save, Load: (*bufferList).load}) + state.Register("pipe.bufferEntry", (*bufferEntry)(nil), state.Fns{Save: (*bufferEntry).save, Load: (*bufferEntry).load}) + state.Register("pipe.inodeOperations", (*inodeOperations)(nil), state.Fns{Save: (*inodeOperations).save, Load: (*inodeOperations).load}) + state.Register("pipe.Pipe", (*Pipe)(nil), state.Fns{Save: (*Pipe).save, Load: (*Pipe).load}) + state.Register("pipe.Reader", (*Reader)(nil), state.Fns{Save: (*Reader).save, Load: (*Reader).load}) + state.Register("pipe.ReaderWriter", (*ReaderWriter)(nil), state.Fns{Save: (*ReaderWriter).save, Load: (*ReaderWriter).load}) + state.Register("pipe.Writer", (*Writer)(nil), state.Fns{Save: (*Writer).save, Load: (*Writer).load}) +} diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go deleted file mode 100644 index e3a14b665..000000000 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pipe - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestPipeRW(t *testing.T) { - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, 65536, 4096) - defer r.DecRef() - defer w.DecRef() - - msg := []byte("here's some bytes") - wantN := int64(len(msg)) - n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if n != wantN || err != nil { - t.Fatalf("Writev: got (%d, %v), wanted (%d, nil)", n, err, wantN) - } - - buf := make([]byte, len(msg)) - n, err = r.Readv(ctx, usermem.BytesIOSequence(buf)) - if n != wantN || err != nil || !bytes.Equal(buf, msg) { - t.Fatalf("Readv: got (%d, %v) %q, wanted (%d, nil) %q", n, err, buf, wantN, msg) - } -} - -func TestPipeReadBlock(t *testing.T) { - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, 65536, 4096) - defer r.DecRef() - defer w.DecRef() - - n, err := r.Readv(ctx, usermem.BytesIOSequence(make([]byte, 1))) - if n != 0 || err != syserror.ErrWouldBlock { - t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, syserror.ErrWouldBlock) - } -} - -func TestPipeWriteBlock(t *testing.T) { - const atomicIOBytes = 2 - const capacity = MinimumPipeSize - - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, capacity, atomicIOBytes) - defer r.DecRef() - defer w.DecRef() - - msg := make([]byte, capacity+1) - n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if wantN, wantErr := int64(capacity), syserror.ErrWouldBlock; n != wantN || err != wantErr { - t.Fatalf("Writev: got (%d, %v), wanted (%d, %v)", n, err, wantN, wantErr) - } -} - -func TestPipeWriteUntilEnd(t *testing.T) { - const atomicIOBytes = 2 - - ctx := contexttest.Context(t) - r, w := NewConnectedPipe(ctx, atomicIOBytes, atomicIOBytes) - defer r.DecRef() - defer w.DecRef() - - msg := []byte("here's some bytes") - - wDone := make(chan struct{}, 0) - rDone := make(chan struct{}, 0) - defer func() { - // Signal the reader to stop and wait until it does so. - close(wDone) - <-rDone - }() - - go func() { - defer close(rDone) - // Read from r until done is closed. - ctx := contexttest.Context(t) - buf := make([]byte, len(msg)+1) - dst := usermem.BytesIOSequence(buf) - e, ch := waiter.NewChannelEntry(nil) - r.EventRegister(&e, waiter.EventIn) - defer r.EventUnregister(&e) - for { - n, err := r.Readv(ctx, dst) - dst = dst.DropFirst64(n) - if err == syserror.ErrWouldBlock { - select { - case <-ch: - continue - case <-wDone: - // We expect to have 1 byte left in dst since len(buf) == - // len(msg)+1. - if dst.NumBytes() != 1 || !bytes.Equal(buf[:len(msg)], msg) { - t.Errorf("Reader: got %q (%d bytes remaining), wanted %q", buf, dst.NumBytes(), msg) - } - return - } - } - if err != nil { - t.Fatalf("Readv: got unexpected error %v", err) - } - } - }() - - src := usermem.BytesIOSequence(msg) - e, ch := waiter.NewChannelEntry(nil) - w.EventRegister(&e, waiter.EventOut) - defer w.EventUnregister(&e) - for src.NumBytes() != 0 { - n, err := w.Writev(ctx, src) - src = src.DropFirst64(n) - if err == syserror.ErrWouldBlock { - <-ch - continue - } - if err != nil { - t.Fatalf("Writev: got (%d, %v)", n, err) - } - } -} diff --git a/pkg/sentry/kernel/process_group_list.go b/pkg/sentry/kernel/process_group_list.go new file mode 100755 index 000000000..853145237 --- /dev/null +++ b/pkg/sentry/kernel/process_group_list.go @@ -0,0 +1,173 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type processGroupElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (processGroupElementMapper) linkerFor(elem *ProcessGroup) *ProcessGroup { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type processGroupList struct { + head *ProcessGroup + tail *ProcessGroup +} + +// Reset resets list l to the empty state. +func (l *processGroupList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *processGroupList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *processGroupList) Front() *ProcessGroup { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *processGroupList) Back() *ProcessGroup { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *processGroupList) PushFront(e *ProcessGroup) { + processGroupElementMapper{}.linkerFor(e).SetNext(l.head) + processGroupElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + processGroupElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *processGroupList) PushBack(e *ProcessGroup) { + processGroupElementMapper{}.linkerFor(e).SetNext(nil) + processGroupElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + processGroupElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *processGroupList) PushBackList(m *processGroupList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + processGroupElementMapper{}.linkerFor(l.tail).SetNext(m.head) + processGroupElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *processGroupList) InsertAfter(b, e *ProcessGroup) { + a := processGroupElementMapper{}.linkerFor(b).Next() + processGroupElementMapper{}.linkerFor(e).SetNext(a) + processGroupElementMapper{}.linkerFor(e).SetPrev(b) + processGroupElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + processGroupElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *processGroupList) InsertBefore(a, e *ProcessGroup) { + b := processGroupElementMapper{}.linkerFor(a).Prev() + processGroupElementMapper{}.linkerFor(e).SetNext(a) + processGroupElementMapper{}.linkerFor(e).SetPrev(b) + processGroupElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + processGroupElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *processGroupList) Remove(e *ProcessGroup) { + prev := processGroupElementMapper{}.linkerFor(e).Prev() + next := processGroupElementMapper{}.linkerFor(e).Next() + + if prev != nil { + processGroupElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + processGroupElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type processGroupEntry struct { + next *ProcessGroup + prev *ProcessGroup +} + +// Next returns the entry that follows e in the list. +func (e *processGroupEntry) Next() *ProcessGroup { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *processGroupEntry) Prev() *ProcessGroup { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *processGroupEntry) SetNext(elem *ProcessGroup) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *processGroupEntry) SetPrev(elem *ProcessGroup) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD deleted file mode 100644 index 1725b8562..000000000 --- a/pkg/sentry/kernel/sched/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "sched", - srcs = [ - "cpuset.go", - "sched.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/sched", - visibility = ["//pkg/sentry:internal"], -) - -go_test( - name = "sched_test", - size = "small", - srcs = ["cpuset_test.go"], - embed = [":sched"], -) diff --git a/pkg/sentry/kernel/sched/cpuset_test.go b/pkg/sentry/kernel/sched/cpuset_test.go deleted file mode 100644 index 3af9f1197..000000000 --- a/pkg/sentry/kernel/sched/cpuset_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sched - -import ( - "testing" -) - -func TestNumCPUs(t *testing.T) { - for i := uint(0); i < 1024; i++ { - c := NewCPUSet(i) - for j := uint(0); j < i; j++ { - c.Set(j) - } - n := c.NumCPUs() - if n != i { - t.Errorf("got wrong number of cpus %d, want %d", n, i) - } - } -} - -func TestClearAbove(t *testing.T) { - const n = 1024 - c := NewFullCPUSet(n) - for i := uint(0); i < n; i++ { - cpu := n - i - c.ClearAbove(cpu) - if got := c.NumCPUs(); got != cpu { - t.Errorf("iteration %d: got %d cpus, wanted %d", i, got, cpu) - } - } -} diff --git a/pkg/sentry/kernel/sched/sched_state_autogen.go b/pkg/sentry/kernel/sched/sched_state_autogen.go new file mode 100755 index 000000000..2a482732e --- /dev/null +++ b/pkg/sentry/kernel/sched/sched_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package sched + diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD deleted file mode 100644 index 36edf10f3..000000000 --- a/pkg/sentry/kernel/semaphore/BUILD +++ /dev/null @@ -1,49 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "waiter_list", - out = "waiter_list.go", - package = "semaphore", - prefix = "waiter", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*waiter", - "Linker": "*waiter", - }, -) - -go_library( - name = "semaphore", - srcs = [ - "semaphore.go", - "waiter_list.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/syserror", - ], -) - -go_test( - name = "semaphore_test", - size = "small", - srcs = ["semaphore_test.go"], - embed = [":semaphore"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go new file mode 100755 index 000000000..f225f6f75 --- /dev/null +++ b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go @@ -0,0 +1,115 @@ +// automatically generated by stateify. + +package semaphore + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Registry) beforeSave() {} +func (x *Registry) save(m state.Map) { + x.beforeSave() + m.Save("userNS", &x.userNS) + m.Save("semaphores", &x.semaphores) + m.Save("lastIDUsed", &x.lastIDUsed) +} + +func (x *Registry) afterLoad() {} +func (x *Registry) load(m state.Map) { + m.Load("userNS", &x.userNS) + m.Load("semaphores", &x.semaphores) + m.Load("lastIDUsed", &x.lastIDUsed) +} + +func (x *Set) beforeSave() {} +func (x *Set) save(m state.Map) { + x.beforeSave() + m.Save("registry", &x.registry) + m.Save("ID", &x.ID) + m.Save("key", &x.key) + m.Save("creator", &x.creator) + m.Save("owner", &x.owner) + m.Save("perms", &x.perms) + m.Save("opTime", &x.opTime) + m.Save("changeTime", &x.changeTime) + m.Save("sems", &x.sems) + m.Save("dead", &x.dead) +} + +func (x *Set) afterLoad() {} +func (x *Set) load(m state.Map) { + m.Load("registry", &x.registry) + m.Load("ID", &x.ID) + m.Load("key", &x.key) + m.Load("creator", &x.creator) + m.Load("owner", &x.owner) + m.Load("perms", &x.perms) + m.Load("opTime", &x.opTime) + m.Load("changeTime", &x.changeTime) + m.Load("sems", &x.sems) + m.Load("dead", &x.dead) +} + +func (x *sem) beforeSave() {} +func (x *sem) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.waiters) { m.Failf("waiters is %v, expected zero", x.waiters) } + m.Save("value", &x.value) + m.Save("pid", &x.pid) +} + +func (x *sem) afterLoad() {} +func (x *sem) load(m state.Map) { + m.Load("value", &x.value) + m.Load("pid", &x.pid) +} + +func (x *waiter) beforeSave() {} +func (x *waiter) save(m state.Map) { + x.beforeSave() + m.Save("waiterEntry", &x.waiterEntry) + m.Save("value", &x.value) + m.Save("ch", &x.ch) +} + +func (x *waiter) afterLoad() {} +func (x *waiter) load(m state.Map) { + m.Load("waiterEntry", &x.waiterEntry) + m.Load("value", &x.value) + m.Load("ch", &x.ch) +} + +func (x *waiterList) beforeSave() {} +func (x *waiterList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *waiterList) afterLoad() {} +func (x *waiterList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *waiterEntry) beforeSave() {} +func (x *waiterEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *waiterEntry) afterLoad() {} +func (x *waiterEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func init() { + state.Register("semaphore.Registry", (*Registry)(nil), state.Fns{Save: (*Registry).save, Load: (*Registry).load}) + state.Register("semaphore.Set", (*Set)(nil), state.Fns{Save: (*Set).save, Load: (*Set).load}) + state.Register("semaphore.sem", (*sem)(nil), state.Fns{Save: (*sem).save, Load: (*sem).load}) + state.Register("semaphore.waiter", (*waiter)(nil), state.Fns{Save: (*waiter).save, Load: (*waiter).load}) + state.Register("semaphore.waiterList", (*waiterList)(nil), state.Fns{Save: (*waiterList).save, Load: (*waiterList).load}) + state.Register("semaphore.waiterEntry", (*waiterEntry)(nil), state.Fns{Save: (*waiterEntry).save, Load: (*waiterEntry).load}) +} diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go deleted file mode 100644 index c235f6ca4..000000000 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package semaphore - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} { - ch, _, err := set.executeOps(ctx, ops, 123) - if err != nil { - t.Fatalf("ExecuteOps(ops) failed, err: %v, ops: %+v", err, ops) - } - if block { - if ch == nil { - t.Fatalf("ExecuteOps(ops) got: nil, expected: !nil, ops: %+v", ops) - } - if signalled(ch) { - t.Fatalf("ExecuteOps(ops) channel should not have been signalled, ops: %+v", ops) - } - } else { - if ch != nil { - t.Fatalf("ExecuteOps(ops) got: %v, expected: nil, ops: %+v", ch, ops) - } - } - return ch -} - -func signalled(ch chan struct{}) bool { - select { - case <-ch: - return true - default: - return false - } -} - -func TestBasic(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 1}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -1 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -1 - ch1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - if !signalled(ch1) { - t.Fatalf("ExecuteOps(ops) channel should not have been signalled, ops: %+v", ops) - } -} - -func TestWaitForZero(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 0}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -2 - ch1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 0 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = 0 - chZero1 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 0 - chZero2 := executeOps(ctx, t, set, ops, true) - - ops[0].SemOp = 1 - executeOps(ctx, t, set, ops, false) - if !signalled(ch1) { - t.Fatalf("ExecuteOps(ops) channel should have been signalled, ops: %+v, set: %+v", ops, set) - } - - ops[0].SemOp = -2 - executeOps(ctx, t, set, ops, false) - if !signalled(chZero1) { - t.Fatalf("ExecuteOps(ops) channel zero 1 should have been signalled, ops: %+v, set: %+v", ops, set) - } - if !signalled(chZero2) { - t.Fatalf("ExecuteOps(ops) channel zero 2 should have been signalled, ops: %+v, set: %+v", ops, set) - } -} - -func TestNoWait(t *testing.T) { - ctx := contexttest.Context(t) - set := &Set{ID: 123, sems: make([]sem, 1)} - ops := []linux.Sembuf{ - {SemOp: 1}, - } - executeOps(ctx, t, set, ops, false) - - ops[0].SemOp = -2 - ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) - } - - ops[0].SemOp = 0 - ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) - } -} - -func TestUnregister(t *testing.T) { - ctx := contexttest.Context(t) - r := NewRegistry(auth.NewRootUserNamespace()) - set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true) - if err != nil { - t.Fatalf("FindOrCreate() failed, err: %v", err) - } - if got := r.FindByID(set.ID); got.ID != set.ID { - t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.ID, got, set) - } - - ops := []linux.Sembuf{ - {SemOp: -1}, - } - chs := make([]chan struct{}, 0, 5) - for i := 0; i < 5; i++ { - ch := executeOps(ctx, t, set, ops, true) - chs = append(chs, ch) - } - - creds := auth.CredentialsFromContext(ctx) - if err := r.RemoveID(set.ID, creds); err != nil { - t.Fatalf("RemoveID(%d) failed, err: %v", set.ID, err) - } - if !set.dead { - t.Fatalf("set is not dead: %+v", set) - } - if got := r.FindByID(set.ID); got != nil { - t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.ID, got) - } - for i, ch := range chs { - if !signalled(ch) { - t.Fatalf("channel %d should have been signalled", i) - } - } -} diff --git a/pkg/sentry/kernel/semaphore/waiter_list.go b/pkg/sentry/kernel/semaphore/waiter_list.go new file mode 100755 index 000000000..33e29fb55 --- /dev/null +++ b/pkg/sentry/kernel/semaphore/waiter_list.go @@ -0,0 +1,173 @@ +package semaphore + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type waiterElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (waiterElementMapper) linkerFor(elem *waiter) *waiter { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type waiterList struct { + head *waiter + tail *waiter +} + +// Reset resets list l to the empty state. +func (l *waiterList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *waiterList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *waiterList) Front() *waiter { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *waiterList) Back() *waiter { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *waiterList) PushFront(e *waiter) { + waiterElementMapper{}.linkerFor(e).SetNext(l.head) + waiterElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + waiterElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *waiterList) PushBack(e *waiter) { + waiterElementMapper{}.linkerFor(e).SetNext(nil) + waiterElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *waiterList) PushBackList(m *waiterList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) + waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *waiterList) InsertAfter(b, e *waiter) { + a := waiterElementMapper{}.linkerFor(b).Next() + waiterElementMapper{}.linkerFor(e).SetNext(a) + waiterElementMapper{}.linkerFor(e).SetPrev(b) + waiterElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + waiterElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *waiterList) InsertBefore(a, e *waiter) { + b := waiterElementMapper{}.linkerFor(a).Prev() + waiterElementMapper{}.linkerFor(e).SetNext(a) + waiterElementMapper{}.linkerFor(e).SetPrev(b) + waiterElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + waiterElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *waiterList) Remove(e *waiter) { + prev := waiterElementMapper{}.linkerFor(e).Prev() + next := waiterElementMapper{}.linkerFor(e).Next() + + if prev != nil { + waiterElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + waiterElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type waiterEntry struct { + next *waiter + prev *waiter +} + +// Next returns the entry that follows e in the list. +func (e *waiterEntry) Next() *waiter { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *waiterEntry) Prev() *waiter { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *waiterEntry) SetNext(elem *waiter) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *waiterEntry) SetPrev(elem *waiter) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go new file mode 100755 index 000000000..25ad17a4e --- /dev/null +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -0,0 +1,55 @@ +package kernel + +import ( + "fmt" + "reflect" + "strings" + "unsafe" + + "gvisor.dev/gvisor/third_party/gvsync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in sc. +func SeqAtomicLoadTaskGoroutineSchedInfo(sc *gvsync.SeqCount, ptr *TaskGoroutineSchedInfo) TaskGoroutineSchedInfo { + // This function doesn't use SeqAtomicTryLoad because doing so is + // measurably, significantly (~20%) slower; Go is awful at inlining. + var val TaskGoroutineSchedInfo + for { + epoch := sc.BeginRead() + if gvsync.RaceEnabled { + + gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + if sc.ReadOk(epoch) { + break + } + } + return val +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read +// would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +func SeqAtomicTryLoadTaskGoroutineSchedInfo(sc *gvsync.SeqCount, epoch gvsync.SeqCountEpoch, ptr *TaskGoroutineSchedInfo) (TaskGoroutineSchedInfo, bool) { + var val TaskGoroutineSchedInfo + if gvsync.RaceEnabled { + gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + val = *ptr + } + return val, sc.ReadOk(epoch) +} + +func initTaskGoroutineSchedInfo() { + var val TaskGoroutineSchedInfo + typ := reflect.TypeOf(val) + name := typ.Name() + if ptrs := gvsync.PointersInType(typ, name); len(ptrs) != 0 { + panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) + } +} diff --git a/pkg/sentry/kernel/session_list.go b/pkg/sentry/kernel/session_list.go new file mode 100755 index 000000000..9ba27b164 --- /dev/null +++ b/pkg/sentry/kernel/session_list.go @@ -0,0 +1,173 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type sessionElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (sessionElementMapper) linkerFor(elem *Session) *Session { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type sessionList struct { + head *Session + tail *Session +} + +// Reset resets list l to the empty state. +func (l *sessionList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *sessionList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *sessionList) Front() *Session { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *sessionList) Back() *Session { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *sessionList) PushFront(e *Session) { + sessionElementMapper{}.linkerFor(e).SetNext(l.head) + sessionElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + sessionElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *sessionList) PushBack(e *Session) { + sessionElementMapper{}.linkerFor(e).SetNext(nil) + sessionElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + sessionElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *sessionList) PushBackList(m *sessionList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + sessionElementMapper{}.linkerFor(l.tail).SetNext(m.head) + sessionElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *sessionList) InsertAfter(b, e *Session) { + a := sessionElementMapper{}.linkerFor(b).Next() + sessionElementMapper{}.linkerFor(e).SetNext(a) + sessionElementMapper{}.linkerFor(e).SetPrev(b) + sessionElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + sessionElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *sessionList) InsertBefore(a, e *Session) { + b := sessionElementMapper{}.linkerFor(a).Prev() + sessionElementMapper{}.linkerFor(e).SetNext(a) + sessionElementMapper{}.linkerFor(e).SetPrev(b) + sessionElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + sessionElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *sessionList) Remove(e *Session) { + prev := sessionElementMapper{}.linkerFor(e).Prev() + next := sessionElementMapper{}.linkerFor(e).Next() + + if prev != nil { + sessionElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + sessionElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type sessionEntry struct { + next *Session + prev *Session +} + +// Next returns the entry that follows e in the list. +func (e *sessionEntry) Next() *Session { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *sessionEntry) Prev() *Session { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *sessionEntry) SetNext(elem *Session) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *sessionEntry) SetPrev(elem *Session) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD deleted file mode 100644 index aa7471eb6..000000000 --- a/pkg/sentry/kernel/shm/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "shm", - srcs = [ - "device.go", - "shm.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/shm", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/kernel/shm/shm_state_autogen.go b/pkg/sentry/kernel/shm/shm_state_autogen.go new file mode 100755 index 000000000..4fd44bb6a --- /dev/null +++ b/pkg/sentry/kernel/shm/shm_state_autogen.go @@ -0,0 +1,74 @@ +// automatically generated by stateify. + +package shm + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Registry) beforeSave() {} +func (x *Registry) save(m state.Map) { + x.beforeSave() + m.Save("userNS", &x.userNS) + m.Save("shms", &x.shms) + m.Save("keysToShms", &x.keysToShms) + m.Save("totalPages", &x.totalPages) + m.Save("lastIDUsed", &x.lastIDUsed) +} + +func (x *Registry) afterLoad() {} +func (x *Registry) load(m state.Map) { + m.Load("userNS", &x.userNS) + m.Load("shms", &x.shms) + m.Load("keysToShms", &x.keysToShms) + m.Load("totalPages", &x.totalPages) + m.Load("lastIDUsed", &x.lastIDUsed) +} + +func (x *Shm) beforeSave() {} +func (x *Shm) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("mfp", &x.mfp) + m.Save("registry", &x.registry) + m.Save("ID", &x.ID) + m.Save("creator", &x.creator) + m.Save("size", &x.size) + m.Save("effectiveSize", &x.effectiveSize) + m.Save("fr", &x.fr) + m.Save("key", &x.key) + m.Save("perms", &x.perms) + m.Save("owner", &x.owner) + m.Save("attachTime", &x.attachTime) + m.Save("detachTime", &x.detachTime) + m.Save("changeTime", &x.changeTime) + m.Save("creatorPID", &x.creatorPID) + m.Save("lastAttachDetachPID", &x.lastAttachDetachPID) + m.Save("pendingDestruction", &x.pendingDestruction) +} + +func (x *Shm) afterLoad() {} +func (x *Shm) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("mfp", &x.mfp) + m.Load("registry", &x.registry) + m.Load("ID", &x.ID) + m.Load("creator", &x.creator) + m.Load("size", &x.size) + m.Load("effectiveSize", &x.effectiveSize) + m.Load("fr", &x.fr) + m.Load("key", &x.key) + m.Load("perms", &x.perms) + m.Load("owner", &x.owner) + m.Load("attachTime", &x.attachTime) + m.Load("detachTime", &x.detachTime) + m.Load("changeTime", &x.changeTime) + m.Load("creatorPID", &x.creatorPID) + m.Load("lastAttachDetachPID", &x.lastAttachDetachPID) + m.Load("pendingDestruction", &x.pendingDestruction) +} + +func init() { + state.Register("shm.Registry", (*Registry)(nil), state.Fns{Save: (*Registry).save, Load: (*Registry).load}) + state.Register("shm.Shm", (*Shm)(nil), state.Fns{Save: (*Shm).save, Load: (*Shm).load}) +} diff --git a/pkg/sentry/kernel/socket_list.go b/pkg/sentry/kernel/socket_list.go new file mode 100755 index 000000000..aed0a555e --- /dev/null +++ b/pkg/sentry/kernel/socket_list.go @@ -0,0 +1,173 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type socketElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (socketElementMapper) linkerFor(elem *SocketEntry) *SocketEntry { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type socketList struct { + head *SocketEntry + tail *SocketEntry +} + +// Reset resets list l to the empty state. +func (l *socketList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *socketList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *socketList) Front() *SocketEntry { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *socketList) Back() *SocketEntry { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *socketList) PushFront(e *SocketEntry) { + socketElementMapper{}.linkerFor(e).SetNext(l.head) + socketElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + socketElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *socketList) PushBack(e *SocketEntry) { + socketElementMapper{}.linkerFor(e).SetNext(nil) + socketElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + socketElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *socketList) PushBackList(m *socketList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + socketElementMapper{}.linkerFor(l.tail).SetNext(m.head) + socketElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *socketList) InsertAfter(b, e *SocketEntry) { + a := socketElementMapper{}.linkerFor(b).Next() + socketElementMapper{}.linkerFor(e).SetNext(a) + socketElementMapper{}.linkerFor(e).SetPrev(b) + socketElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + socketElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *socketList) InsertBefore(a, e *SocketEntry) { + b := socketElementMapper{}.linkerFor(a).Prev() + socketElementMapper{}.linkerFor(e).SetNext(a) + socketElementMapper{}.linkerFor(e).SetPrev(b) + socketElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + socketElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *socketList) Remove(e *SocketEntry) { + prev := socketElementMapper{}.linkerFor(e).Prev() + next := socketElementMapper{}.linkerFor(e).Next() + + if prev != nil { + socketElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + socketElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type socketEntry struct { + next *SocketEntry + prev *SocketEntry +} + +// Next returns the entry that follows e in the list. +func (e *socketEntry) Next() *SocketEntry { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *socketEntry) Prev() *SocketEntry { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *socketEntry) SetNext(elem *SocketEntry) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *socketEntry) SetPrev(elem *SocketEntry) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/table_test.go b/pkg/sentry/kernel/table_test.go deleted file mode 100644 index 32cf47e05..000000000 --- a/pkg/sentry/kernel/table_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi" - "gvisor.dev/gvisor/pkg/sentry/arch" -) - -const ( - maxTestSyscall = 1000 -) - -func createSyscallTable() *SyscallTable { - m := make(map[uintptr]Syscall) - for i := uintptr(0); i <= maxTestSyscall; i++ { - j := i - m[i] = Syscall{ - Fn: func(*Task, arch.SyscallArguments) (uintptr, *SyscallControl, error) { - return j, nil, nil - }, - } - } - - s := &SyscallTable{ - OS: abi.Linux, - Arch: arch.AMD64, - Table: m, - } - - RegisterSyscallTable(s) - return s -} - -func TestTable(t *testing.T) { - table := createSyscallTable() - defer func() { - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} - }() - - // Go through all functions and check that they return the right value. - for i := uintptr(0); i < maxTestSyscall; i++ { - fn := table.Lookup(i) - if fn == nil { - t.Errorf("Syscall %v is set to nil", i) - continue - } - - v, _, _ := fn(nil, arch.SyscallArguments{}) - if v != i { - t.Errorf("Wrong return value for syscall %v: expected %v, got %v", i, i, v) - } - } - - // Check that values outside the range return nil. - for i := uintptr(maxTestSyscall + 1); i < maxTestSyscall+100; i++ { - fn := table.Lookup(i) - if fn != nil { - t.Errorf("Syscall %v is not nil: %v", i, fn) - continue - } - } -} - -func BenchmarkTableLookup(b *testing.B) { - table := createSyscallTable() - - b.ResetTimer() - - j := uintptr(0) - for i := 0; i < b.N; i++ { - table.Lookup(j) - j = (j + 1) % 310 - } - - b.StopTimer() - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} -} - -func BenchmarkTableMapLookup(b *testing.B) { - table := createSyscallTable() - - b.ResetTimer() - - j := uintptr(0) - for i := 0; i < b.N; i++ { - table.mapLookup(j) - j = (j + 1) % 310 - } - - b.StopTimer() - // Cleanup registered tables to keep tests separate. - allSyscallTables = []*SyscallTable{} -} diff --git a/pkg/sentry/kernel/task_list.go b/pkg/sentry/kernel/task_list.go new file mode 100755 index 000000000..57d3f098d --- /dev/null +++ b/pkg/sentry/kernel/task_list.go @@ -0,0 +1,173 @@ +package kernel + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type taskElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (taskElementMapper) linkerFor(elem *Task) *Task { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type taskList struct { + head *Task + tail *Task +} + +// Reset resets list l to the empty state. +func (l *taskList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *taskList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *taskList) Front() *Task { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *taskList) Back() *Task { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *taskList) PushFront(e *Task) { + taskElementMapper{}.linkerFor(e).SetNext(l.head) + taskElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + taskElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *taskList) PushBack(e *Task) { + taskElementMapper{}.linkerFor(e).SetNext(nil) + taskElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + taskElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *taskList) PushBackList(m *taskList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + taskElementMapper{}.linkerFor(l.tail).SetNext(m.head) + taskElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *taskList) InsertAfter(b, e *Task) { + a := taskElementMapper{}.linkerFor(b).Next() + taskElementMapper{}.linkerFor(e).SetNext(a) + taskElementMapper{}.linkerFor(e).SetPrev(b) + taskElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + taskElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *taskList) InsertBefore(a, e *Task) { + b := taskElementMapper{}.linkerFor(a).Prev() + taskElementMapper{}.linkerFor(e).SetNext(a) + taskElementMapper{}.linkerFor(e).SetPrev(b) + taskElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + taskElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *taskList) Remove(e *Task) { + prev := taskElementMapper{}.linkerFor(e).Prev() + next := taskElementMapper{}.linkerFor(e).Next() + + if prev != nil { + taskElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + taskElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type taskEntry struct { + next *Task + prev *Task +} + +// Next returns the entry that follows e in the list. +func (e *taskEntry) Next() *Task { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *taskEntry) Prev() *Task { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *taskEntry) SetNext(elem *Task) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *taskEntry) SetPrev(elem *Task) { + e.prev = elem +} diff --git a/pkg/sentry/kernel/task_test.go b/pkg/sentry/kernel/task_test.go deleted file mode 100644 index cfcde9a7a..000000000 --- a/pkg/sentry/kernel/task_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/kernel/sched" -) - -func TestTaskCPU(t *testing.T) { - for _, test := range []struct { - mask sched.CPUSet - tid ThreadID - cpu int32 - }{ - { - mask: []byte{0xff}, - tid: 1, - cpu: 0, - }, - { - mask: []byte{0xff}, - tid: 10, - cpu: 1, - }, - { - // more than 8 cpus. - mask: []byte{0xff, 0xff}, - tid: 10, - cpu: 9, - }, - { - // missing the first cpu. - mask: []byte{0xfe}, - tid: 1, - cpu: 1, - }, - { - mask: []byte{0xfe}, - tid: 10, - cpu: 3, - }, - { - // missing the fifth cpu. - mask: []byte{0xef}, - tid: 10, - cpu: 2, - }, - } { - assigned := assignCPU(test.mask, test.tid) - if test.cpu != assigned { - t.Errorf("assignCPU(%v, %v) got %v, want %v", test.mask, test.tid, assigned, test.cpu) - } - } - -} diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD deleted file mode 100644 index 9beae4b31..000000000 --- a/pkg/sentry/kernel/time/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "time", - srcs = [ - "context.go", - "time.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/time", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/kernel/time/time_state_autogen.go b/pkg/sentry/kernel/time/time_state_autogen.go new file mode 100755 index 000000000..77b676cec --- /dev/null +++ b/pkg/sentry/kernel/time/time_state_autogen.go @@ -0,0 +1,56 @@ +// automatically generated by stateify. + +package time + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Time) beforeSave() {} +func (x *Time) save(m state.Map) { + x.beforeSave() + m.Save("ns", &x.ns) +} + +func (x *Time) afterLoad() {} +func (x *Time) load(m state.Map) { + m.Load("ns", &x.ns) +} + +func (x *Setting) beforeSave() {} +func (x *Setting) save(m state.Map) { + x.beforeSave() + m.Save("Enabled", &x.Enabled) + m.Save("Next", &x.Next) + m.Save("Period", &x.Period) +} + +func (x *Setting) afterLoad() {} +func (x *Setting) load(m state.Map) { + m.Load("Enabled", &x.Enabled) + m.Load("Next", &x.Next) + m.Load("Period", &x.Period) +} + +func (x *Timer) beforeSave() {} +func (x *Timer) save(m state.Map) { + x.beforeSave() + m.Save("clock", &x.clock) + m.Save("listener", &x.listener) + m.Save("setting", &x.setting) + m.Save("paused", &x.paused) +} + +func (x *Timer) afterLoad() {} +func (x *Timer) load(m state.Map) { + m.Load("clock", &x.clock) + m.Load("listener", &x.listener) + m.Load("setting", &x.setting) + m.Load("paused", &x.paused) +} + +func init() { + state.Register("time.Time", (*Time)(nil), state.Fns{Save: (*Time).save, Load: (*Time).load}) + state.Register("time.Setting", (*Setting)(nil), state.Fns{Save: (*Setting).save, Load: (*Setting).load}) + state.Register("time.Timer", (*Timer)(nil), state.Fns{Save: (*Timer).save, Load: (*Timer).load}) +} diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go deleted file mode 100644 index 849c5b646..000000000 --- a/pkg/sentry/kernel/timekeeper_test.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - sentrytime "gvisor.dev/gvisor/pkg/sentry/time" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" -) - -// mockClocks is a sentrytime.Clocks that simply returns the times in the -// struct. -type mockClocks struct { - monotonic int64 - realtime int64 -} - -// Update implements sentrytime.Clocks.Update. It does nothing. -func (*mockClocks) Update() (monotonicParams sentrytime.Parameters, monotonicOk bool, realtimeParam sentrytime.Parameters, realtimeOk bool) { - return -} - -// Update implements sentrytime.Clocks.GetTime. -func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) { - switch id { - case sentrytime.Monotonic: - return c.monotonic, nil - case sentrytime.Realtime: - return c.realtime, nil - default: - return 0, syserror.EINVAL - } -} - -// stateTestClocklessTimekeeper returns a test Timekeeper which has not had -// SetClocks called. -func stateTestClocklessTimekeeper(tb testing.TB) *Timekeeper { - ctx := contexttest.Context(tb) - mfp := pgalloc.MemoryFileProviderFromContext(ctx) - fr, err := mfp.MemoryFile().Allocate(usermem.PageSize, usage.Anonymous) - if err != nil { - tb.Fatalf("failed to allocate memory: %v", err) - } - return &Timekeeper{ - params: NewVDSOParamPage(mfp, fr), - } -} - -func stateTestTimekeeper(tb testing.TB) *Timekeeper { - t := stateTestClocklessTimekeeper(tb) - t.SetClocks(sentrytime.NewCalibratedClocks()) - return t -} - -// TestTimekeeperMonotonicZero tests that monotonic time starts at zero. -func TestTimekeeperMonotonicZero(t *testing.T) { - c := &mockClocks{ - monotonic: 100000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.SetClocks(c) - defer tk.Destroy() - - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 0 { - t.Errorf("GetTime got %d want 0", now) - } - - c.monotonic += 10 - - now, err = tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 10 { - t.Errorf("GetTime got %d want 10", now) - } -} - -// TestTimekeeperMonotonicJumpForward tests that monotonic time jumps forward -// after restore. -func TestTimekeeperMonotonicForward(t *testing.T) { - c := &mockClocks{ - monotonic: 900000, - realtime: 600000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.restored = make(chan struct{}) - tk.saveMonotonic = 100000 - tk.saveRealtime = 400000 - tk.SetClocks(c) - defer tk.Destroy() - - // The monotonic clock should jump ahead by 200000 to 300000. - // - // The new system monotonic time (900000) is irrelevant to what the app - // sees. - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 300000 { - t.Errorf("GetTime got %d want 300000", now) - } -} - -// TestTimekeeperMonotonicJumpBackwards tests that monotonic time does not jump -// backwards when realtime goes backwards. -func TestTimekeeperMonotonicJumpBackwards(t *testing.T) { - c := &mockClocks{ - monotonic: 900000, - realtime: 400000, - } - - tk := stateTestClocklessTimekeeper(t) - tk.restored = make(chan struct{}) - tk.saveMonotonic = 100000 - tk.saveRealtime = 600000 - tk.SetClocks(c) - defer tk.Destroy() - - // The monotonic clock should remain at 100000. - // - // The new system monotonic time (900000) is irrelevant to what the app - // sees and we don't want to jump the monotonic clock backwards like - // realtime did. - now, err := tk.GetTime(sentrytime.Monotonic) - if err != nil { - t.Errorf("GetTime err got %v want nil", err) - } - if now != 100000 { - t.Errorf("GetTime got %d want 100000", now) - } -} diff --git a/pkg/sentry/kernel/uncaught_signal.proto b/pkg/sentry/kernel/uncaught_signal.proto deleted file mode 100644 index 0bdb062cb..000000000 --- a/pkg/sentry/kernel/uncaught_signal.proto +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -import "pkg/sentry/arch/registers.proto"; - -message UncaughtSignal { - // Thread ID. - int32 tid = 1; - - // Process ID. - int32 pid = 2; - - // Registers at the time of the fault or signal. - Registers registers = 3; - - // Signal number. - int32 signal_number = 4; - - // The memory location which caused the fault (set if applicable, 0 - // otherwise). This will be set for SIGILL, SIGFPE, SIGSEGV, and SIGBUS. - uint64 fault_addr = 5; -} diff --git a/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go new file mode 100755 index 000000000..822e549ab --- /dev/null +++ b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go @@ -0,0 +1,119 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: pkg/sentry/kernel/uncaught_signal.proto + +package gvisor + +import ( + fmt "fmt" + proto "github.com/golang/protobuf/proto" + registers_go_proto "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package + +type UncaughtSignal struct { + Tid int32 `protobuf:"varint,1,opt,name=tid,proto3" json:"tid,omitempty"` + Pid int32 `protobuf:"varint,2,opt,name=pid,proto3" json:"pid,omitempty"` + Registers *registers_go_proto.Registers `protobuf:"bytes,3,opt,name=registers,proto3" json:"registers,omitempty"` + SignalNumber int32 `protobuf:"varint,4,opt,name=signal_number,json=signalNumber,proto3" json:"signal_number,omitempty"` + FaultAddr uint64 `protobuf:"varint,5,opt,name=fault_addr,json=faultAddr,proto3" json:"fault_addr,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *UncaughtSignal) Reset() { *m = UncaughtSignal{} } +func (m *UncaughtSignal) String() string { return proto.CompactTextString(m) } +func (*UncaughtSignal) ProtoMessage() {} +func (*UncaughtSignal) Descriptor() ([]byte, []int) { + return fileDescriptor_5ca9e03e13704688, []int{0} +} + +func (m *UncaughtSignal) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_UncaughtSignal.Unmarshal(m, b) +} +func (m *UncaughtSignal) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_UncaughtSignal.Marshal(b, m, deterministic) +} +func (m *UncaughtSignal) XXX_Merge(src proto.Message) { + xxx_messageInfo_UncaughtSignal.Merge(m, src) +} +func (m *UncaughtSignal) XXX_Size() int { + return xxx_messageInfo_UncaughtSignal.Size(m) +} +func (m *UncaughtSignal) XXX_DiscardUnknown() { + xxx_messageInfo_UncaughtSignal.DiscardUnknown(m) +} + +var xxx_messageInfo_UncaughtSignal proto.InternalMessageInfo + +func (m *UncaughtSignal) GetTid() int32 { + if m != nil { + return m.Tid + } + return 0 +} + +func (m *UncaughtSignal) GetPid() int32 { + if m != nil { + return m.Pid + } + return 0 +} + +func (m *UncaughtSignal) GetRegisters() *registers_go_proto.Registers { + if m != nil { + return m.Registers + } + return nil +} + +func (m *UncaughtSignal) GetSignalNumber() int32 { + if m != nil { + return m.SignalNumber + } + return 0 +} + +func (m *UncaughtSignal) GetFaultAddr() uint64 { + if m != nil { + return m.FaultAddr + } + return 0 +} + +func init() { + proto.RegisterType((*UncaughtSignal)(nil), "gvisor.UncaughtSignal") +} + +func init() { + proto.RegisterFile("pkg/sentry/kernel/uncaught_signal.proto", fileDescriptor_5ca9e03e13704688) +} + +var fileDescriptor_5ca9e03e13704688 = []byte{ + // 210 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x4c, 0x8e, 0x4d, 0x4a, 0xc6, 0x30, + 0x10, 0x86, 0x89, 0xfd, 0x81, 0xc6, 0x1f, 0x34, 0xab, 0x20, 0x88, 0x45, 0x17, 0x76, 0xd5, 0x80, + 0x9e, 0xc0, 0x0b, 0xb8, 0x88, 0xb8, 0x2e, 0x69, 0x13, 0xd3, 0xd0, 0x9a, 0x86, 0x49, 0x22, 0x78, + 0x24, 0x6f, 0x29, 0x4d, 0xd4, 0xef, 0xdb, 0x0d, 0xcf, 0xbc, 0xf3, 0xcc, 0x8b, 0x1f, 0xdc, 0xa2, + 0x99, 0x57, 0x36, 0xc0, 0x17, 0x5b, 0x14, 0x58, 0xb5, 0xb2, 0x68, 0x27, 0x11, 0xf5, 0x1c, 0x06, + 0x6f, 0xb4, 0x15, 0x6b, 0xef, 0x60, 0x0b, 0x1b, 0xa9, 0xf5, 0xa7, 0xf1, 0x1b, 0x5c, 0xdf, 0x1e, + 0x1d, 0x08, 0x98, 0x66, 0x06, 0x4a, 0x1b, 0x1f, 0x14, 0xf8, 0x1c, 0xbc, 0xfb, 0x46, 0xf8, 0xe2, + 0xed, 0x57, 0xf1, 0x9a, 0x0c, 0xe4, 0x12, 0x17, 0xc1, 0x48, 0x8a, 0x5a, 0xd4, 0x55, 0x7c, 0x1f, + 0x77, 0xe2, 0x8c, 0xa4, 0x27, 0x99, 0x38, 0x23, 0x09, 0xc3, 0xcd, 0xbf, 0x89, 0x16, 0x2d, 0xea, + 0x4e, 0x1f, 0xaf, 0xfa, 0xfc, 0xb3, 0xe7, 0x7f, 0x0b, 0x7e, 0xc8, 0x90, 0x7b, 0x7c, 0x9e, 0x0b, + 0x0e, 0x36, 0x7e, 0x8c, 0x0a, 0x68, 0x99, 0x64, 0x67, 0x19, 0xbe, 0x24, 0x46, 0x6e, 0x30, 0x7e, + 0x17, 0x71, 0x0d, 0x83, 0x90, 0x12, 0x68, 0xd5, 0xa2, 0xae, 0xe4, 0x4d, 0x22, 0xcf, 0x52, 0xc2, + 0x58, 0xa7, 0xca, 0x4f, 0x3f, 0x01, 0x00, 0x00, 0xff, 0xff, 0xfd, 0x62, 0x54, 0xdf, 0x06, 0x01, + 0x00, 0x00, +} diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD deleted file mode 100644 index 40025d62d..000000000 --- a/pkg/sentry/limits/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "limits", - srcs = [ - "context.go", - "limits.go", - "linux.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/limits", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - ], -) - -go_test( - name = "limits_test", - size = "small", - srcs = [ - "limits_test.go", - ], - embed = [":limits"], -) diff --git a/pkg/sentry/limits/limits_state_autogen.go b/pkg/sentry/limits/limits_state_autogen.go new file mode 100755 index 000000000..912a99cae --- /dev/null +++ b/pkg/sentry/limits/limits_state_autogen.go @@ -0,0 +1,36 @@ +// automatically generated by stateify. + +package limits + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Limit) beforeSave() {} +func (x *Limit) save(m state.Map) { + x.beforeSave() + m.Save("Cur", &x.Cur) + m.Save("Max", &x.Max) +} + +func (x *Limit) afterLoad() {} +func (x *Limit) load(m state.Map) { + m.Load("Cur", &x.Cur) + m.Load("Max", &x.Max) +} + +func (x *LimitSet) beforeSave() {} +func (x *LimitSet) save(m state.Map) { + x.beforeSave() + m.Save("data", &x.data) +} + +func (x *LimitSet) afterLoad() {} +func (x *LimitSet) load(m state.Map) { + m.Load("data", &x.data) +} + +func init() { + state.Register("limits.Limit", (*Limit)(nil), state.Fns{Save: (*Limit).save, Load: (*Limit).load}) + state.Register("limits.LimitSet", (*LimitSet)(nil), state.Fns{Save: (*LimitSet).save, Load: (*LimitSet).load}) +} diff --git a/pkg/sentry/limits/limits_test.go b/pkg/sentry/limits/limits_test.go deleted file mode 100644 index 658a20f56..000000000 --- a/pkg/sentry/limits/limits_test.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package limits - -import ( - "syscall" - "testing" -) - -func TestSet(t *testing.T) { - testCases := []struct { - limit Limit - privileged bool - expectedErr error - }{ - {limit: Limit{Cur: 50, Max: 50}, privileged: false, expectedErr: nil}, - {limit: Limit{Cur: 20, Max: 50}, privileged: false, expectedErr: nil}, - {limit: Limit{Cur: 20, Max: 60}, privileged: false, expectedErr: syscall.EPERM}, - {limit: Limit{Cur: 60, Max: 50}, privileged: false, expectedErr: syscall.EINVAL}, - {limit: Limit{Cur: 11, Max: 10}, privileged: false, expectedErr: syscall.EINVAL}, - {limit: Limit{Cur: 20, Max: 60}, privileged: true, expectedErr: nil}, - } - - ls := NewLimitSet() - for _, tc := range testCases { - if _, err := ls.Set(1, tc.limit, tc.privileged); err != tc.expectedErr { - t.Fatalf("Tried to set Limit to %+v and privilege %t: got %v, wanted %v", tc.limit, tc.privileged, err, tc.expectedErr) - } - } - -} diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD deleted file mode 100644 index 3b322f5f3..000000000 --- a/pkg/sentry/loader/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_embed_data") - -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_embed_data( - name = "vdso_bin", - src = "//vdso:vdso.so", - package = "loader", - var = "vdsoBin", -) - -go_library( - name = "loader", - srcs = [ - "elf.go", - "interpreter.go", - "loader.go", - "vdso.go", - "vdso_state.go", - ":vdso_bin", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/loader", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/cpuid", - "//pkg/log", - "//pkg/rand", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/safemem", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/loader/loader_state_autogen.go b/pkg/sentry/loader/loader_state_autogen.go new file mode 100755 index 000000000..4fe3c779d --- /dev/null +++ b/pkg/sentry/loader/loader_state_autogen.go @@ -0,0 +1,57 @@ +// automatically generated by stateify. + +package loader + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *VDSO) beforeSave() {} +func (x *VDSO) save(m state.Map) { + x.beforeSave() + var phdrs []elfProgHeader = x.savePhdrs() + m.SaveValue("phdrs", phdrs) + m.Save("ParamPage", &x.ParamPage) + m.Save("vdso", &x.vdso) + m.Save("os", &x.os) + m.Save("arch", &x.arch) +} + +func (x *VDSO) afterLoad() {} +func (x *VDSO) load(m state.Map) { + m.Load("ParamPage", &x.ParamPage) + m.Load("vdso", &x.vdso) + m.Load("os", &x.os) + m.Load("arch", &x.arch) + m.LoadValue("phdrs", new([]elfProgHeader), func(y interface{}) { x.loadPhdrs(y.([]elfProgHeader)) }) +} + +func (x *elfProgHeader) beforeSave() {} +func (x *elfProgHeader) save(m state.Map) { + x.beforeSave() + m.Save("Type", &x.Type) + m.Save("Flags", &x.Flags) + m.Save("Off", &x.Off) + m.Save("Vaddr", &x.Vaddr) + m.Save("Paddr", &x.Paddr) + m.Save("Filesz", &x.Filesz) + m.Save("Memsz", &x.Memsz) + m.Save("Align", &x.Align) +} + +func (x *elfProgHeader) afterLoad() {} +func (x *elfProgHeader) load(m state.Map) { + m.Load("Type", &x.Type) + m.Load("Flags", &x.Flags) + m.Load("Off", &x.Off) + m.Load("Vaddr", &x.Vaddr) + m.Load("Paddr", &x.Paddr) + m.Load("Filesz", &x.Filesz) + m.Load("Memsz", &x.Memsz) + m.Load("Align", &x.Align) +} + +func init() { + state.Register("loader.VDSO", (*VDSO)(nil), state.Fns{Save: (*VDSO).save, Load: (*VDSO).load}) + state.Register("loader.elfProgHeader", (*elfProgHeader)(nil), state.Fns{Save: (*elfProgHeader).save, Load: (*elfProgHeader).load}) +} diff --git a/pkg/sentry/loader/vdso_bin.go b/pkg/sentry/loader/vdso_bin.go new file mode 100755 index 000000000..365e8593d --- /dev/null +++ b/pkg/sentry/loader/vdso_bin.go @@ -0,0 +1,5 @@ +// Generated by go_embed_data for //pkg/sentry/loader:vdso_bin. DO NOT EDIT. + +package loader + +var vdsoBin = []byte("ELF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00>\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00(\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x008\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00p\xff\xff\xff\xff\xff\xa6\x00\x00\x00\x00\x00\x00\xa6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00x\x00\x00\x00\x00\x00\x00xp\xff\xff\xff\xff\xffxp\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00\x00\x00\x00$p\xff\xff\xff\xff\xff$p\xff\xff\xff\xff\xff@\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\xe5td\x00\x00\x00d\x00\x00\x00\x00\x00\x00dp\xff\xff\xff\xff\xffdp\xff\xff\xff\xff\xff<\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x80p\xff\xff\xff\xff\xffl\x00\x00\x00\x00\x00\x00\x00D\x00\x00\x00\"\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00R\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff\"\x00\x00\x00\x00\x00\x00\x00^\x00\x00\x00\"\x00\x00\x80p\xff\xff\xff\xff\xffl\x00\x00\x00\x00\x00\x00\x00k\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00y\x00\x00\x00\"\x00\x00\xf0p\xff\xff\xff\xff\xff\"\x00\x00\x00\x00\x00\x00\x00~\x00\x00\x00\"\x00\x00 p\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00linux-vdso.so.1\x00LINUX_2.6\x00__vdso_clock_gettime\x00__vdso_gettimeofday\x00clock_gettime\x00__vdso_time\x00gettimeofday\x00__vdso_getcpu\x00time\x00getcpu\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa1\xbf\xee
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6u\xae\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00GNU\x00gold 1.11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00GNU\x00LV\xe1\xe2\x87\x82\n\x98`o2F\xc4\xee;8\x00\x00\x00\x00\x00\x00\xdc\x00\x00T\x00\x00\x00
\x00\x00l\x00\x00\x00\x8c
\x00\x00\x9c\x00\x00\x00\xbc
\x00\x00\xbc\x00\x00\x00\xcc
\x00\x00\xd4\x00\x00\x00\x8c\x00\x00\xf4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00zR\x00x\x90\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x004\x00\x00\x00\xa8\x00\x00l\x00\x00\x00\x00A\x86A\x83G0X\nAAE\x00\x00\x00\x00\x00\x00\x00\x00d\x00\x00\x00\xe8\x00\x00\"\x00\x00\x00\x00A\x83G XA\x00\x00\x00\x00\x84\x00\x00\x00\xf8\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9c\x00\x00\x00\xf0\x00\x00\xb3\x00\x00\x00\x00A\x83\nhJ\x00\x00\x00\xbc\x00\x00\x00\x90
\x00\x00\xb6\x00\x00\x00\x00A\x83\nhM\x00\x00\x00\x00\x00\x00\x00`p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Pp\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00\x85\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xff\xffo\x00\x00\x00\x00\xd6p\xff\xff\xff\xff\xff\xfc\xff\xffo\x00\x00\x00\x00\xecp\xff\xff\xff\xff\xff\xfd\xff\xffo\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x83\xfft+\x83\xfft&\x85\xfft\xb8\xe4\x00\x00\x00\xc3f.\x84\x00\x00\x00\x00\x00H\x89\xf7\xe9\xc8\x00\x00\x00\x84\x00\x00\x00\x00\x00H\x89\xf7\xe9x\x00\x00\x84\x00\x00\x00\x00\x00USH\x89\xf3H\x83\xecH\x85\xfft;H\x89\xfdH\x89\xe7\xe8\x97\x00\x00\x00\x85\xc0u@H\x8b$H\x8bL$H\xba\xcf\xf7S㥛\xc4 H\x89E\x00H\x89\xc8H\xc1\xf9?H\xf7\xeaH\xc1\xfaH)\xcaH\x89UH\x85\xdbt\xc7\x00\x00\x00\x00\xc7C\x00\x00\x00\x001\xc0H\x83\xc4[]\xc3@\x001\xc0\xeb\xf1@\x00SH\x89\xfbH\x83\xecH\x89\xe7\xe80\x00\x00\x00H\x85\xdbH\x8b$tH\x89H\x83\xc4[\xc3@\x00f.\x84\x00\x00\x00\x00\x00\xb85\x00\x00H\x98Ð\x90\x90\x90\x90\x90SH\x89\xfeH\x8d
\xc5\xde\xff\xffH\x8b9\x83\xe7\xfeH\x8bY(L\x8bA8Lc\xcfL\x8bY0L\x8bQ@\xae\xe81H\x8b9L9\xcfu\xddH\x85\xdbtrH\x89щ\xc0H\xc1\xe1 H \xc11\xc0I9\xcb(H\xb8\x00\x00\x00\x00\x00ʚ;1\xd2L)\xd9I\xf7\xf2H\x89\xcfH\xc1\xff?H\xaf\xf8H\xf7\xe1H\xfaH\xac\xd0 I\x8d<\x00H\xb9SZ\x9b\xa0/\xb8D\x00[H\x89\xf8H\xc1\xe8 H\xf7\xe1H\x89\xd0H\xc1\xe8H\x89Hi\xc0\x00ʚ;H)\xc71\xc0H\x89~\xc3\x00\xb8\xe4\x00\x00\x001\xff[\xc3\x00f.\x84\x00\x00\x00\x00\x00SH\x89\xfeH\x8d
\xde\xff\xffH\x8b9\x83\xe7\xfeH\x8bYL\x8bALc\xcfL\x8bYL\x8bQ \xae\xe81H\x8b9L9\xcfu\xddH\x85\xdbtrH\x89щ\xc0H\xc1\xe1 H \xc11\xc0I9\xcb(H\xb8\x00\x00\x00\x00\x00ʚ;1\xd2L)\xd9I\xf7\xf2H\x89\xcfH\xc1\xff?H\xaf\xf8H\xf7\xe1H\xfaH\xac\xd0 I\x8d<\x00H\xb9SZ\x9b\xa0/\xb8D\x00[H\x89\xf8H\xc1\xe8 H\xf7\xe1H\x89\xd0H\xc1\xe8H\x89Hi\xc0\x00ʚ;H)\xc71\xc0H\x89~\xc3\x00\xb8\xe4\x00\x00\x00\xbf\x00\x00\x00[\xc3\x00GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000p\xff\xff\xff\xff\xff\xb3\x00\x00\x00\x00\x00\x00\x009\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff\xb6\x00\x00\x00\x00\x00\x00\x00]\x00\x00\x00 \x00xp\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00f\x00\x00\x00\x00\x00\xf1\xff\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00s\x00\x00\x00\x00\x00\xf1\xff\x00\xf0o\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00{\x00\x00\x00\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x85\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00\x9a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x80p\xff\xff\xff\xff\xffl\x00\x00\x00\x00\x00\x00\x00\xc4\x00\x00\x00\"\x00\x00@p\xff\xff\xff\xff\xff8\x00\x00\x00\x00\x00\x00\x00\xd2\x00\x00\x00\x00\x00\xf0p\xff\xff\xff\xff\xff\"\x00\x00\x00\x00\x00\x00\x00\xde\x00\x00\x00\"\x00\x00\x80p\xff\xff\xff\xff\xffl\x00\x00\x00\x00\x00\x00\x00\xeb\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00\xf9\x00\x00\x00\"\x00\x00\xf0p\xff\xff\xff\xff\xff\"\x00\x00\x00\x00\x00\x00\x00\xfe\x00\x00\x00\"\x00\x00 p\xff\xff\xff\xff\xff\n\x00\x00\x00\x00\x00\x00\x00\x00vdso.cc\x00vdso_time.cc\x00_ZN4vdso13ClockRealtimeEP8timespec\x00_ZN4vdso14ClockMonotonicEP8timespec\x00_DYNAMIC\x00VDSO_PRELINK\x00_params\x00LINUX_2.6\x00__vdso_clock_gettime\x00_GLOBAL_OFFSET_TABLE_\x00__vdso_gettimeofday\x00clock_gettime\x00__vdso_time\x00gettimeofday\x00__vdso_getcpu\x00time\x00getcpu\x00\x00.text\x00.comment\x00.bss\x00.dynstr\x00.eh_frame_hdr\x00.gnu.version\x00.dynsym\x00.hash\x00.note\x00.eh_frame\x00.gnu.version_d\x00.dynamic\x00.shstrtab\x00.strtab\x00.symtab\x00.data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 p\xff\xff\xff\xff\xff \x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`p\xff\xff\xff\xff\xff`\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Pp\xff\xff\xff\xff\xffP\x00\x00\x00\x00\x00\x00\x85\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00+\x00\x00\x00\xff\xff\xffo\x00\x00\x00\x00\x00\x00\x00\xd6p\xff\xff\xff\xff\xff\xd6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00V\x00\x00\x00\xfd\xff\xffo\x00\x00\x00\x00\x00\x00\x00\xecp\xff\xff\xff\xff\xff\xec\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00F\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$p\xff\xff\xff\xff\xff$\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00dp\xff\xff\xff\xff\xffd\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00L\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0p\xff\xff\xff\xff\xff\xa0\x00\x00\x00\x00\x00\x00\xd8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00xp\xff\xff\xff\xff\xffx\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00p\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@p\xff\xff\xff\xff\xff@\x00\x00\x00\x00\x00\x00f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa6\x00\x00\x00\x00\x00\x006\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x95\x00\x00\x00\x00\x00\x00\x8e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD deleted file mode 100644 index 29c14ec56..000000000 --- a/pkg/sentry/memmap/BUILD +++ /dev/null @@ -1,57 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "mappable_range", - out = "mappable_range.go", - package = "memmap", - prefix = "Mappable", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "mapping_set_impl", - out = "mapping_set_impl.go", - package = "memmap", - prefix = "Mapping", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "MappableRange", - "Value": "MappingsOfRange", - "Functions": "mappingSetFunctions", - }, -) - -go_library( - name = "memmap", - srcs = [ - "mappable_range.go", - "mapping_set.go", - "mapping_set_impl.go", - "memmap.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/memmap", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/log", - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/platform", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) - -go_test( - name = "memmap_test", - size = "small", - srcs = ["mapping_set_test.go"], - embed = [":memmap"], - deps = ["//pkg/sentry/usermem"], -) diff --git a/pkg/sentry/memmap/mappable_range.go b/pkg/sentry/memmap/mappable_range.go new file mode 100755 index 000000000..6b6c2c685 --- /dev/null +++ b/pkg/sentry/memmap/mappable_range.go @@ -0,0 +1,62 @@ +package memmap + +// A Range represents a contiguous range of T. +// +// +stateify savable +type MappableRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r MappableRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r MappableRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r MappableRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r MappableRange) Overlaps(r2 MappableRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r MappableRange) IsSupersetOf(r2 MappableRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r MappableRange) Intersect(r2 MappableRange) MappableRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r MappableRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/memmap/mapping_set_impl.go b/pkg/sentry/memmap/mapping_set_impl.go new file mode 100755 index 000000000..e632f28a5 --- /dev/null +++ b/pkg/sentry/memmap/mapping_set_impl.go @@ -0,0 +1,1270 @@ +package memmap + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + MappingminDegree = 3 + + MappingmaxDegree = 2 * MappingminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type MappingSet struct { + root Mappingnode `state:".(*MappingSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *MappingSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *MappingSet) IsEmptyRange(r MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *MappingSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *MappingSet) SpanRange(r MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *MappingSet) FirstSegment() MappingIterator { + if s.root.nrSegments == 0 { + return MappingIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *MappingSet) LastSegment() MappingIterator { + if s.root.nrSegments == 0 { + return MappingIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *MappingSet) FirstGap() MappingGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return MappingGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *MappingSet) LastGap() MappingGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return MappingGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *MappingSet) Find(key uint64) (MappingIterator, MappingGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return MappingIterator{n, i}, MappingGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return MappingIterator{}, MappingGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *MappingSet) FindSegment(key uint64) MappingIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *MappingSet) LowerBoundSegment(min uint64) MappingIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *MappingSet) UpperBoundSegment(max uint64) MappingIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *MappingSet) FindGap(key uint64) MappingGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *MappingSet) LowerBoundGap(min uint64) MappingGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *MappingSet) UpperBoundGap(max uint64) MappingGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *MappingSet) Add(r MappableRange, val MappingsOfRange) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *MappingSet) AddWithoutMerging(r MappableRange, val MappingsOfRange) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *MappingSet) Insert(gap MappingGapIterator, r MappableRange, val MappingsOfRange) MappingIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (mappingSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (mappingSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (mappingSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *MappingSet) InsertWithoutMerging(gap MappingGapIterator, r MappableRange, val MappingsOfRange) MappingIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *MappingSet) InsertWithoutMergingUnchecked(gap MappingGapIterator, r MappableRange, val MappingsOfRange) MappingIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return MappingIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *MappingSet) Remove(seg MappingIterator) MappingGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + mappingSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(MappingGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *MappingSet) RemoveAll() { + s.root = Mappingnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *MappingSet) RemoveRange(r MappableRange) MappingGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *MappingSet) Merge(first, second MappingIterator) MappingIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *MappingSet) MergeUnchecked(first, second MappingIterator) MappingIterator { + if first.End() == second.Start() { + if mval, ok := (mappingSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return MappingIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *MappingSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *MappingSet) MergeRange(r MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *MappingSet) MergeAdjacent(r MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *MappingSet) Split(seg MappingIterator, split uint64) (MappingIterator, MappingIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *MappingSet) SplitUnchecked(seg MappingIterator, split uint64) (MappingIterator, MappingIterator) { + val1, val2 := (mappingSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *MappingSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *MappingSet) Isolate(seg MappingIterator, r MappableRange) MappingIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *MappingSet) ApplyContiguous(r MappableRange, fn func(seg MappingIterator)) MappingGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return MappingGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return MappingGapIterator{} + } + } +} + +// +stateify savable +type Mappingnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Mappingnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [MappingmaxDegree - 1]MappableRange + values [MappingmaxDegree - 1]MappingsOfRange + children [MappingmaxDegree]*Mappingnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Mappingnode) firstSegment() MappingIterator { + for n.hasChildren { + n = n.children[0] + } + return MappingIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Mappingnode) lastSegment() MappingIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return MappingIterator{n, n.nrSegments - 1} +} + +func (n *Mappingnode) prevSibling() *Mappingnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Mappingnode) nextSibling() *Mappingnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Mappingnode) rebalanceBeforeInsert(gap MappingGapIterator) MappingGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < MappingmaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &Mappingnode{ + nrSegments: MappingminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Mappingnode{ + nrSegments: MappingminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:MappingminDegree-1], n.keys[:MappingminDegree-1]) + copy(left.values[:MappingminDegree-1], n.values[:MappingminDegree-1]) + copy(right.keys[:MappingminDegree-1], n.keys[MappingminDegree:]) + copy(right.values[:MappingminDegree-1], n.values[MappingminDegree:]) + n.keys[0], n.values[0] = n.keys[MappingminDegree-1], n.values[MappingminDegree-1] + MappingzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:MappingminDegree], n.children[:MappingminDegree]) + copy(right.children[:MappingminDegree], n.children[MappingminDegree:]) + MappingzeroNodeSlice(n.children[2:]) + for i := 0; i < MappingminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < MappingminDegree { + return MappingGapIterator{left, gap.index} + } + return MappingGapIterator{right, gap.index - MappingminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[MappingminDegree-1], n.values[MappingminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Mappingnode{ + nrSegments: MappingminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:MappingminDegree-1], n.keys[MappingminDegree:]) + copy(sibling.values[:MappingminDegree-1], n.values[MappingminDegree:]) + MappingzeroValueSlice(n.values[MappingminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:MappingminDegree], n.children[MappingminDegree:]) + MappingzeroNodeSlice(n.children[MappingminDegree:]) + for i := 0; i < MappingminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = MappingminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < MappingminDegree { + return gap + } + return MappingGapIterator{sibling, gap.index - MappingminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Mappingnode) rebalanceAfterRemove(gap MappingGapIterator) MappingGapIterator { + for { + if n.nrSegments >= MappingminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= MappingminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + mappingSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return MappingGapIterator{n, 0} + } + if gap.node == n { + return MappingGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= MappingminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + mappingSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return MappingGapIterator{n, n.nrSegments} + } + return MappingGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return MappingGapIterator{p, gap.index} + } + if gap.node == right { + return MappingGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Mappingnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = MappingGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + mappingSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type MappingIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Mappingnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg MappingIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg MappingIterator) Range() MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg MappingIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg MappingIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg MappingIterator) SetRangeUnchecked(r MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg MappingIterator) SetRange(r MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg MappingIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg MappingIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg MappingIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg MappingIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg MappingIterator) Value() MappingsOfRange { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg MappingIterator) ValuePtr() *MappingsOfRange { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg MappingIterator) SetValue(val MappingsOfRange) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg MappingIterator) PrevSegment() MappingIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return MappingIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return MappingIterator{} + } + return MappingsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg MappingIterator) NextSegment() MappingIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return MappingIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return MappingIterator{} + } + return MappingsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg MappingIterator) PrevGap() MappingGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return MappingGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg MappingIterator) NextGap() MappingGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return MappingGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg MappingIterator) PrevNonEmpty() (MappingIterator, MappingGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return MappingIterator{}, gap + } + return gap.PrevSegment(), MappingGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg MappingIterator) NextNonEmpty() (MappingIterator, MappingGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return MappingIterator{}, gap + } + return gap.NextSegment(), MappingGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type MappingGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Mappingnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap MappingGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap MappingGapIterator) Range() MappableRange { + return MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap MappingGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return mappingSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap MappingGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return mappingSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap MappingGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap MappingGapIterator) PrevSegment() MappingIterator { + return MappingsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap MappingGapIterator) NextSegment() MappingIterator { + return MappingsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap MappingGapIterator) PrevGap() MappingGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return MappingGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap MappingGapIterator) NextGap() MappingGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return MappingGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func MappingsegmentBeforePosition(n *Mappingnode, i int) MappingIterator { + for i == 0 { + if n.parent == nil { + return MappingIterator{} + } + n, i = n.parent, n.parentIndex + } + return MappingIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func MappingsegmentAfterPosition(n *Mappingnode, i int) MappingIterator { + for i == n.nrSegments { + if n.parent == nil { + return MappingIterator{} + } + n, i = n.parent, n.parentIndex + } + return MappingIterator{n, i} +} + +func MappingzeroValueSlice(slice []MappingsOfRange) { + + for i := range slice { + mappingSetFunctions{}.ClearValue(&slice[i]) + } +} + +func MappingzeroNodeSlice(slice []*Mappingnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *MappingSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Mappingnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Mappingnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type MappingSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []MappingsOfRange +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *MappingSet) ExportSortedSlices() *MappingSegmentDataSlices { + var sds MappingSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *MappingSet) ImportSortedSlices(sds *MappingSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *MappingSet) saveRoot() *MappingSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *MappingSet) loadRoot(sds *MappingSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go deleted file mode 100644 index f9b11a59c..000000000 --- a/pkg/sentry/memmap/mapping_set_test.go +++ /dev/null @@ -1,260 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memmap - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -type testMappingSpace struct { - // Ideally we'd store the full ranges that were invalidated, rather - // than individual calls to Invalidate, as they are an implementation - // detail, but this is the simplest way for now. - inv []usermem.AddrRange -} - -func (n *testMappingSpace) reset() { - n.inv = []usermem.AddrRange{} -} - -func (n *testMappingSpace) Invalidate(ar usermem.AddrRange, opts InvalidateOpts) { - n.inv = append(n.inv, ar) -} - -func TestAddRemoveMapping(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - mapped := set.AddMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true) - if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings (usermem.AddrRanges => memmap.MappableRange): - // [0x10000, 0x12000) => [0x1000, 0x3000) - t.Log(&set) - - mapped = set.AddMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) - if len(mapped) != 0 { - t.Errorf("AddMapping: got %+v, wanted []", mapped) - } - - // Mappings: - // [0x10000, 0x11000) => [0x1000, 0x2000) - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - t.Log(&set) - - mapped = set.AddMapping(ms, usermem.AddrRange{0x30000, 0x31000}, 0x4000, true) - if got, want := mapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x11000) => [0x1000, 0x2000) - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - // [0x30000, 0x31000) => [0x4000, 0x5000) - t.Log(&set) - - mapped = set.AddMapping(ms, usermem.AddrRange{0x12000, 0x15000}, 0x3000, true) - if got, want := mapped, []MappableRange{{0x3000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x11000) => [0x1000, 0x2000) - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - // [0x12000, 0x13000) => [0x3000, 0x4000) - // [0x13000, 0x14000) and [0x30000, 0x31000) => [0x4000, 0x5000) - // [0x14000, 0x15000) => [0x5000, 0x6000) - t.Log(&set) - - unmapped := set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0x1000, true) - if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) - // [0x12000, 0x13000) => [0x3000, 0x4000) - // [0x13000, 0x14000) and [0x30000, 0x31000) => [0x4000, 0x5000) - // [0x14000, 0x15000) => [0x5000, 0x6000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - // Mappings: - // [0x11000, 0x13000) => [0x2000, 0x4000) - // [0x13000, 0x14000) and [0x30000, 0x31000) => [0x4000, 0x5000) - // [0x14000, 0x15000) => [0x5000, 0x6000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x11000, 0x15000}, 0x2000, true) - if got, want := unmapped, []MappableRange{{0x2000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x30000, 0x31000) => [0x4000, 0x5000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x30000, 0x31000}, 0x4000, true) - if got, want := unmapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidateWholeMapping(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - set.AddMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0, true) - // Mappings: - // [0x10000, 0x11000) => [0, 0x1000) - t.Log(&set) - set.Invalidate(MappableRange{0, 0x1000}, InvalidateOpts{}) - if got, want := ms.inv, []usermem.AddrRange{{0x10000, 0x11000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidatePartialMapping(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - set.AddMapping(ms, usermem.AddrRange{0x10000, 0x13000}, 0, true) - // Mappings: - // [0x10000, 0x13000) => [0, 0x3000) - t.Log(&set) - set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{}) - if got, want := ms.inv, []usermem.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidateMultipleMappings(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - set.AddMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0, true) - set.AddMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) - // Mappings: - // [0x10000, 0x11000) => [0, 0x1000) - // [0x12000, 0x13000) => [0x2000, 0x3000) - t.Log(&set) - set.Invalidate(MappableRange{0, 0x3000}, InvalidateOpts{}) - if got, want := ms.inv, []usermem.AddrRange{{0x10000, 0x11000}, {0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: got %+v, wanted %+v", got, want) - } -} - -func TestInvalidateOverlappingMappings(t *testing.T) { - set := MappingSet{} - ms1 := &testMappingSpace{} - ms2 := &testMappingSpace{} - - set.AddMapping(ms1, usermem.AddrRange{0x10000, 0x12000}, 0, true) - set.AddMapping(ms2, usermem.AddrRange{0x20000, 0x22000}, 0x1000, true) - // Mappings: - // ms1:[0x10000, 0x12000) => [0, 0x2000) - // ms2:[0x11000, 0x13000) => [0x1000, 0x3000) - t.Log(&set) - set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{}) - if got, want := ms1.inv, []usermem.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want) - } - if got, want := ms2.inv, []usermem.AddrRange{{0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { - t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want) - } -} - -func TestMixedWritableMappings(t *testing.T) { - set := MappingSet{} - ms := &testMappingSpace{} - - mapped := set.AddMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true) - if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x12000) writable => [0x1000, 0x3000) - t.Log(&set) - - mapped = set.AddMapping(ms, usermem.AddrRange{0x20000, 0x22000}, 0x2000, false) - if got, want := mapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) { - t.Errorf("AddMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x11000) writable => [0x1000, 0x2000) - // [0x11000, 0x12000) writable and [0x20000, 0x21000) readonly => [0x2000, 0x3000) - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - // Unmap should fail because we specified the readonly map address range, but - // asked to unmap a writable segment. - unmapped := set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - // Readonly mapping removed, but writable mapping still exists in the range, - // so no mappable range fully unmapped. - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, false) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - // Mappings: - // [0x10000, 0x12000) writable => [0x1000, 0x3000) - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x11000, 0x12000}, 0x2000, true) - if got, want := unmapped, []MappableRange{{0x2000, 0x3000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x10000, 0x12000) writable => [0x1000, 0x3000) - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - // Unmap should fail since writable bit doesn't match. - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, false) - if len(unmapped) != 0 { - t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) - } - - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true) - if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } - - // Mappings: - // [0x21000, 0x22000) readonly => [0x3000, 0x4000) - t.Log(&set) - - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x21000, 0x22000}, 0x3000, false) - if got, want := unmapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) { - t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) - } -} diff --git a/pkg/sentry/memmap/memmap_state_autogen.go b/pkg/sentry/memmap/memmap_state_autogen.go new file mode 100755 index 000000000..2d0814ab1 --- /dev/null +++ b/pkg/sentry/memmap/memmap_state_autogen.go @@ -0,0 +1,93 @@ +// automatically generated by stateify. + +package memmap + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *MappableRange) beforeSave() {} +func (x *MappableRange) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) +} + +func (x *MappableRange) afterLoad() {} +func (x *MappableRange) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) +} + +func (x *MappingOfRange) beforeSave() {} +func (x *MappingOfRange) save(m state.Map) { + x.beforeSave() + m.Save("MappingSpace", &x.MappingSpace) + m.Save("AddrRange", &x.AddrRange) + m.Save("Writable", &x.Writable) +} + +func (x *MappingOfRange) afterLoad() {} +func (x *MappingOfRange) load(m state.Map) { + m.Load("MappingSpace", &x.MappingSpace) + m.Load("AddrRange", &x.AddrRange) + m.Load("Writable", &x.Writable) +} + +func (x *MappingSet) beforeSave() {} +func (x *MappingSet) save(m state.Map) { + x.beforeSave() + var root *MappingSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *MappingSet) afterLoad() {} +func (x *MappingSet) load(m state.Map) { + m.LoadValue("root", new(*MappingSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*MappingSegmentDataSlices)) }) +} + +func (x *Mappingnode) beforeSave() {} +func (x *Mappingnode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *Mappingnode) afterLoad() {} +func (x *Mappingnode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *MappingSegmentDataSlices) beforeSave() {} +func (x *MappingSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *MappingSegmentDataSlices) afterLoad() {} +func (x *MappingSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func init() { + state.Register("memmap.MappableRange", (*MappableRange)(nil), state.Fns{Save: (*MappableRange).save, Load: (*MappableRange).load}) + state.Register("memmap.MappingOfRange", (*MappingOfRange)(nil), state.Fns{Save: (*MappingOfRange).save, Load: (*MappingOfRange).load}) + state.Register("memmap.MappingSet", (*MappingSet)(nil), state.Fns{Save: (*MappingSet).save, Load: (*MappingSet).load}) + state.Register("memmap.Mappingnode", (*Mappingnode)(nil), state.Fns{Save: (*Mappingnode).save, Load: (*Mappingnode).load}) + state.Register("memmap.MappingSegmentDataSlices", (*MappingSegmentDataSlices)(nil), state.Fns{Save: (*MappingSegmentDataSlices).save, Load: (*MappingSegmentDataSlices).load}) +} diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD deleted file mode 100644 index 072745a08..000000000 --- a/pkg/sentry/mm/BUILD +++ /dev/null @@ -1,142 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "file_refcount_set", - out = "file_refcount_set.go", - imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", - }, - package = "mm", - prefix = "fileRefcount", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "platform.FileRange", - "Value": "int32", - "Functions": "fileRefcountSetFunctions", - }, -) - -go_template_instance( - name = "vma_set", - out = "vma_set.go", - consts = { - "minDegree": "8", - }, - imports = { - "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem", - }, - package = "mm", - prefix = "vma", - template = "//pkg/segment:generic_set", - types = { - "Key": "usermem.Addr", - "Range": "usermem.AddrRange", - "Value": "vma", - "Functions": "vmaSetFunctions", - }, -) - -go_template_instance( - name = "pma_set", - out = "pma_set.go", - consts = { - "minDegree": "8", - }, - imports = { - "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem", - }, - package = "mm", - prefix = "pma", - template = "//pkg/segment:generic_set", - types = { - "Key": "usermem.Addr", - "Range": "usermem.AddrRange", - "Value": "pma", - "Functions": "pmaSetFunctions", - }, -) - -go_template_instance( - name = "io_list", - out = "io_list.go", - package = "mm", - prefix = "io", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*ioResult", - "Linker": "*ioResult", - }, -) - -go_library( - name = "mm", - srcs = [ - "address_space.go", - "aio_context.go", - "aio_context_state.go", - "debug.go", - "file_refcount_set.go", - "io.go", - "io_list.go", - "lifecycle.go", - "metadata.go", - "mm.go", - "pma.go", - "pma_set.go", - "procfs.go", - "save_restore.go", - "shm.go", - "special_mappable.go", - "syscalls.go", - "vma.go", - "vma_set.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/mm", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/atomicbitops", - "//pkg/log", - "//pkg/refs", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/proc/seqfile", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/futex", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/safemem", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/tcpip/buffer", - "//third_party/gvsync", - ], -) - -go_test( - name = "mm_test", - size = "small", - srcs = ["mm_test.go"], - embed = [":mm"], - deps = [ - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md deleted file mode 100644 index e1322e373..000000000 --- a/pkg/sentry/mm/README.md +++ /dev/null @@ -1,280 +0,0 @@ -This package provides an emulation of Linux semantics for application virtual -memory mappings. - -For completeness, this document also describes aspects of the memory management -subsystem defined outside this package. - -# Background - -We begin by describing semantics for virtual memory in Linux. - -A virtual address space is defined as a collection of mappings from virtual -addresses to physical memory. However, userspace applications do not configure -mappings to physical memory directly. Instead, applications configure memory -mappings from virtual addresses to offsets into a file using the `mmap` system -call.[^mmap-anon] For example, a call to: - - mmap( - /* addr = */ 0x400000, - /* length = */ 0x1000, - PROT_READ | PROT_WRITE, - MAP_SHARED, - /* fd = */ 3, - /* offset = */ 0); - -creates a mapping of length 0x1000 bytes, starting at virtual address (VA) -0x400000, to offset 0 in the file represented by file descriptor (FD) 3. Within -the Linux kernel, virtual memory mappings are represented by *virtual memory -areas* (VMAs). Supposing that FD 3 represents file /tmp/foo, the state of the -virtual memory subsystem after the `mmap` call may be depicted as: - - VMA: VA:0x400000 -> /tmp/foo:0x0 - -Establishing a virtual memory area does not necessarily establish a mapping to a -physical address, because Linux has not necessarily provisioned physical memory -to store the file's contents. Thus, if the application attempts to read the -contents of VA 0x400000, it may incur a *page fault*, a CPU exception that -forces the kernel to create such a mapping to service the read. - -For a file, doing so consists of several logical phases: - -1. The kernel allocates physical memory to store the contents of the required - part of the file, and copies file contents to the allocated memory. - Supposing that the kernel chooses the physical memory at physical address - (PA) 0x2fb000, the resulting state of the system is: - - VMA: VA:0x400000 -> /tmp/foo:0x0 - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - - (In Linux the state of the mapping from file offset to physical memory is - stored in `struct address_space`, but to avoid confusion with other notions - of address space we will refer to this system as filemap, named after Linux - kernel source file `mm/filemap.c`.) - -2. The kernel stores the effective mapping from virtual to physical address in - a *page table entry* (PTE) in the application's *page tables*, which are - used by the CPU's virtual memory hardware to perform address translation. - The resulting state of the system is: - - VMA: VA:0x400000 -> /tmp/foo:0x0 - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - PTE: VA:0x400000 -----------------> PA:0x2fb000 - - The PTE is required for the application to actually use the contents of the - mapped file as virtual memory. However, the PTE is derived from the VMA and - filemap state, both of which are independently mutable, such that mutations - to either will affect the PTE. For example: - - - The application may remove the VMA using the `munmap` system call. This - breaks the mapping from VA:0x400000 to /tmp/foo:0x0, and consequently - the mapping from VA:0x400000 to PA:0x2fb000. However, it does not - necessarily break the mapping from /tmp/foo:0x0 to PA:0x2fb000, so a - future mapping of the same file offset may reuse this physical memory. - - - The application may invalidate the file's contents by passing a length - of 0 to the `ftruncate` system call. This breaks the mapping from - /tmp/foo:0x0 to PA:0x2fb000, and consequently the mapping from - VA:0x400000 to PA:0x2fb000. However, it does not break the mapping from - VA:0x400000 to /tmp/foo:0x0, so future changes to the file's contents - may again be made visible at VA:0x400000 after another page fault - results in the allocation of a new physical address. - - Note that, in order to correctly break the mapping from VA:0x400000 to - PA:0x2fb000 in the latter case, filemap must also store a *reverse mapping* - from /tmp/foo:0x0 to VA:0x400000 so that it can locate and remove the PTE. - -[^mmap-anon]: Memory mappings to non-files are discussed in later sections. - -## Private Mappings - -The preceding example considered VMAs created using the `MAP_SHARED` flag, which -means that PTEs derived from the mapping should always use physical memory that -represents the current state of the mapped file.[^mmap-dev-zero] Applications -can alternatively pass the `MAP_PRIVATE` flag to create a *private mapping*. -Private mappings are *copy-on-write*. - -Suppose that the application instead created a private mapping in the previous -example. In Linux, the state of the system after a read page fault would be: - - VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - PTE: VA:0x400000 -----------------> PA:0x2fb000 (read-only) - -Now suppose the application attempts to write to VA:0x400000. For a shared -mapping, the write would be propagated to PA:0x2fb000, and the kernel would be -responsible for ensuring that the write is later propagated to the mapped file. -For a private mapping, the write incurs another page fault since the PTE is -marked read-only. In response, the kernel allocates physical memory to store the -mapping's *private copy* of the file's contents, copies file contents to the -allocated memory, and changes the PTE to map to the private copy. Supposing that -the kernel chooses the physical memory at physical address (PA) 0x5ea000, the -resulting state of the system is: - - VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Filemap: /tmp/foo:0x0 -> PA:0x2fb000 - PTE: VA:0x400000 -----------------> PA:0x5ea000 - -Note that the filemap mapping from /tmp/foo:0x0 to PA:0x2fb000 may still exist, -but is now irrelevant to this mapping. - -[^mmap-dev-zero]: Modulo files with special mmap semantics such as `/dev/zero`. - -## Anonymous Mappings - -Instead of passing a file to the `mmap` system call, applications can instead -request an *anonymous* mapping by passing the `MAP_ANONYMOUS` flag. -Semantically, an anonymous mapping is essentially a mapping to an ephemeral file -initially filled with zero bytes. Practically speaking, this is how shared -anonymous mappings are implemented, but private anonymous mappings do not result -in the creation of an ephemeral file; since there would be no way to modify the -contents of the underlying file through a private mapping, all private anonymous -mappings use a single shared page filled with zero bytes until copy-on-write -occurs. - -# Virtual Memory in the Sentry - -The sentry implements application virtual memory atop a host kernel, introducing -an additional level of indirection to the above. - -Consider the same scenario as in the previous section. Since the sentry handles -application system calls, the effect of an application `mmap` system call is to -create a VMA in the sentry (as opposed to the host kernel): - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - -When the application first incurs a page fault on this address, the host kernel -delivers information about the page fault to the sentry in a platform-dependent -manner, and the sentry handles the fault: - -1. The sentry allocates memory to store the contents of the required part of - the file, and copies file contents to the allocated memory. However, since - the sentry is implemented atop a host kernel, it does not configure mappings - to physical memory directly. Instead, mappable "memory" in the sentry is - represented by a host file descriptor and offset, since (as noted in - "Background") this is the memory mapping primitive provided by the host - kernel. In general, memory is allocated from a temporary host file using the - `pgalloc` package. Supposing that the sentry allocates offset 0x3000 from - host file "memory-file", the resulting state is: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - -2. The sentry stores the effective mapping from virtual address to host file in - a host VMA by invoking the `mmap` system call: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x3000 - -3. The sentry returns control to the application, which immediately incurs the - page fault again.[^mmap-populate] However, since a host VMA now exists for - the faulting virtual address, the host kernel now handles the page fault as - described in "Background": - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x3000 - Host filemap: host:memory-file:0x3000 -> PA:0x2fb000 - Host PTE: VA:0x400000 --------------------------------------------> PA:0x2fb000 - -Thus, from an implementation standpoint, host VMAs serve the same purpose in the -sentry that PTEs do in Linux. As in Linux, sentry VMA and filemap state is -independently mutable, and the desired state of host VMAs is derived from that -state. - -[^mmap-populate]: The sentry could force the host kernel to establish PTEs when - it creates the host VMA by passing the `MAP_POPULATE` flag to - the `mmap` system call, but usually does not. This is because, - to reduce the number of page faults that require handling by - the sentry and (correspondingly) the number of host `mmap` - system calls, the sentry usually creates host VMAs that are - much larger than the single faulting page. - -## Private Mappings - -The sentry implements private mappings consistently with Linux. Before -copy-on-write, the private mapping example given in the Background results in: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x3000 (read-only) - Host filemap: host:memory-file:0x3000 -> PA:0x2fb000 - Host PTE: VA:0x400000 --------------------------------------------> PA:0x2fb000 (read-only) - -When the application attempts to write to this address, the host kernel delivers -information about the resulting page fault to the sentry. Analogous to Linux, -the sentry allocates memory to store the mapping's private copy of the file's -contents, copies file contents to the allocated memory, and changes the host VMA -to map to the private copy. Supposing that the sentry chooses the offset 0x4000 -in host file `memory-file` to store the private copy, the state of the system -after copy-on-write is: - - Sentry VMA: VA:0x400000 -> /tmp/foo:0x0 (private) - Sentry filemap: /tmp/foo:0x0 -> host:memory-file:0x3000 - Host VMA: VA:0x400000 -----------------> host:memory-file:0x4000 - Host filemap: host:memory-file:0x4000 -> PA:0x5ea000 - Host PTE: VA:0x400000 --------------------------------------------> PA:0x5ea000 - -However, this highlights an important difference between Linux and the sentry. -In Linux, page tables are concrete (architecture-dependent) data structures -owned by the kernel. Conversely, the sentry has the ability to create and -destroy host VMAs using host system calls, but it does not have direct access to -their state. Thus, as written, if the application invokes the `munmap` system -call to remove the sentry VMA, it is non-trivial for the sentry to determine -that it should deallocate `host:memory-file:0x4000`. This implies that the -sentry must retain information about the host VMAs that it has created. - -## Anonymous Mappings - -The sentry implements anonymous mappings consistently with Linux, except that -there is no shared zero page. - -# Implementation Constructs - -In Linux: - -- A virtual address space is represented by `struct mm_struct`. - -- VMAs are represented by `struct vm_area_struct`, stored in `struct - mm_struct::mmap`. - -- Mappings from file offsets to physical memory are stored in `struct - address_space`. - -- Reverse mappings from file offsets to virtual mappings are stored in `struct - address_space::i_mmap`. - -- Physical memory pages are represented by a pointer to `struct page` or an - index called a *page frame number* (PFN), represented by `pfn_t`. - -- PTEs are represented by architecture-dependent type `pte_t`, stored in a - table hierarchy rooted at `struct mm_struct::pgd`. - -In the sentry: - -- A virtual address space is represented by type [`mm.MemoryManager`][mm]. - -- Sentry VMAs are represented by type [`mm.vma`][mm], stored in - `mm.MemoryManager.vmas`. - -- Mappings from sentry file offsets to host file offsets are abstracted - through interface method [`memmap.Mappable.Translate`][memmap]. - -- Reverse mappings from sentry file offsets to virtual mappings are abstracted - through interface methods - [`memmap.Mappable.AddMapping` and `memmap.Mappable.RemoveMapping`][memmap]. - -- Host files that may be mapped into host VMAs are represented by type - [`platform.File`][platform]. - -- Host VMAs are represented in the sentry by type [`mm.pma`][mm] ("platform - mapping area"), stored in `mm.MemoryManager.pmas`. - -- Creation and destruction of host VMAs is abstracted through interface - methods - [`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform]. - -[memmap]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/memmap/memmap.go -[mm]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/mm/mm.go -[pgalloc]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/pgalloc/pgalloc.go -[platform]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/platform/platform.go diff --git a/pkg/sentry/mm/file_refcount_set.go b/pkg/sentry/mm/file_refcount_set.go new file mode 100755 index 000000000..6b3081009 --- /dev/null +++ b/pkg/sentry/mm/file_refcount_set.go @@ -0,0 +1,1274 @@ +package mm + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/platform" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + fileRefcountminDegree = 3 + + fileRefcountmaxDegree = 2 * fileRefcountminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type fileRefcountSet struct { + root fileRefcountnode `state:".(*fileRefcountSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *fileRefcountSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *fileRefcountSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *fileRefcountSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *fileRefcountSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *fileRefcountSet) FirstSegment() fileRefcountIterator { + if s.root.nrSegments == 0 { + return fileRefcountIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *fileRefcountSet) LastSegment() fileRefcountIterator { + if s.root.nrSegments == 0 { + return fileRefcountIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *fileRefcountSet) FirstGap() fileRefcountGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return fileRefcountGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *fileRefcountSet) LastGap() fileRefcountGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return fileRefcountGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *fileRefcountSet) Find(key uint64) (fileRefcountIterator, fileRefcountGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return fileRefcountIterator{n, i}, fileRefcountGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return fileRefcountIterator{}, fileRefcountGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *fileRefcountSet) FindSegment(key uint64) fileRefcountIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *fileRefcountSet) LowerBoundSegment(min uint64) fileRefcountIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *fileRefcountSet) UpperBoundSegment(max uint64) fileRefcountIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *fileRefcountSet) FindGap(key uint64) fileRefcountGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *fileRefcountSet) LowerBoundGap(min uint64) fileRefcountGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *fileRefcountSet) UpperBoundGap(max uint64) fileRefcountGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *fileRefcountSet) Add(r __generics_imported0.FileRange, val int32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *fileRefcountSet) AddWithoutMerging(r __generics_imported0.FileRange, val int32) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *fileRefcountSet) Insert(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (fileRefcountSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (fileRefcountSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (fileRefcountSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *fileRefcountSet) InsertWithoutMerging(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *fileRefcountSet) InsertWithoutMergingUnchecked(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return fileRefcountIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *fileRefcountSet) Remove(seg fileRefcountIterator) fileRefcountGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + fileRefcountSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(fileRefcountGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *fileRefcountSet) RemoveAll() { + s.root = fileRefcountnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *fileRefcountSet) RemoveRange(r __generics_imported0.FileRange) fileRefcountGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *fileRefcountSet) Merge(first, second fileRefcountIterator) fileRefcountIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *fileRefcountSet) MergeUnchecked(first, second fileRefcountIterator) fileRefcountIterator { + if first.End() == second.Start() { + if mval, ok := (fileRefcountSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return fileRefcountIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *fileRefcountSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *fileRefcountSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *fileRefcountSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *fileRefcountSet) Split(seg fileRefcountIterator, split uint64) (fileRefcountIterator, fileRefcountIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *fileRefcountSet) SplitUnchecked(seg fileRefcountIterator, split uint64) (fileRefcountIterator, fileRefcountIterator) { + val1, val2 := (fileRefcountSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *fileRefcountSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *fileRefcountSet) Isolate(seg fileRefcountIterator, r __generics_imported0.FileRange) fileRefcountIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *fileRefcountSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg fileRefcountIterator)) fileRefcountGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return fileRefcountGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return fileRefcountGapIterator{} + } + } +} + +// +stateify savable +type fileRefcountnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *fileRefcountnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [fileRefcountmaxDegree - 1]__generics_imported0.FileRange + values [fileRefcountmaxDegree - 1]int32 + children [fileRefcountmaxDegree]*fileRefcountnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *fileRefcountnode) firstSegment() fileRefcountIterator { + for n.hasChildren { + n = n.children[0] + } + return fileRefcountIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *fileRefcountnode) lastSegment() fileRefcountIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return fileRefcountIterator{n, n.nrSegments - 1} +} + +func (n *fileRefcountnode) prevSibling() *fileRefcountnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *fileRefcountnode) nextSibling() *fileRefcountnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *fileRefcountnode) rebalanceBeforeInsert(gap fileRefcountGapIterator) fileRefcountGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < fileRefcountmaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &fileRefcountnode{ + nrSegments: fileRefcountminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &fileRefcountnode{ + nrSegments: fileRefcountminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:fileRefcountminDegree-1], n.keys[:fileRefcountminDegree-1]) + copy(left.values[:fileRefcountminDegree-1], n.values[:fileRefcountminDegree-1]) + copy(right.keys[:fileRefcountminDegree-1], n.keys[fileRefcountminDegree:]) + copy(right.values[:fileRefcountminDegree-1], n.values[fileRefcountminDegree:]) + n.keys[0], n.values[0] = n.keys[fileRefcountminDegree-1], n.values[fileRefcountminDegree-1] + fileRefcountzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:fileRefcountminDegree], n.children[:fileRefcountminDegree]) + copy(right.children[:fileRefcountminDegree], n.children[fileRefcountminDegree:]) + fileRefcountzeroNodeSlice(n.children[2:]) + for i := 0; i < fileRefcountminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < fileRefcountminDegree { + return fileRefcountGapIterator{left, gap.index} + } + return fileRefcountGapIterator{right, gap.index - fileRefcountminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[fileRefcountminDegree-1], n.values[fileRefcountminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &fileRefcountnode{ + nrSegments: fileRefcountminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:fileRefcountminDegree-1], n.keys[fileRefcountminDegree:]) + copy(sibling.values[:fileRefcountminDegree-1], n.values[fileRefcountminDegree:]) + fileRefcountzeroValueSlice(n.values[fileRefcountminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:fileRefcountminDegree], n.children[fileRefcountminDegree:]) + fileRefcountzeroNodeSlice(n.children[fileRefcountminDegree:]) + for i := 0; i < fileRefcountminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = fileRefcountminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < fileRefcountminDegree { + return gap + } + return fileRefcountGapIterator{sibling, gap.index - fileRefcountminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *fileRefcountnode) rebalanceAfterRemove(gap fileRefcountGapIterator) fileRefcountGapIterator { + for { + if n.nrSegments >= fileRefcountminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= fileRefcountminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + fileRefcountSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return fileRefcountGapIterator{n, 0} + } + if gap.node == n { + return fileRefcountGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= fileRefcountminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + fileRefcountSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return fileRefcountGapIterator{n, n.nrSegments} + } + return fileRefcountGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return fileRefcountGapIterator{p, gap.index} + } + if gap.node == right { + return fileRefcountGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *fileRefcountnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = fileRefcountGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + fileRefcountSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type fileRefcountIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *fileRefcountnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg fileRefcountIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg fileRefcountIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg fileRefcountIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg fileRefcountIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg fileRefcountIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg fileRefcountIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg fileRefcountIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg fileRefcountIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg fileRefcountIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg fileRefcountIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg fileRefcountIterator) Value() int32 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg fileRefcountIterator) ValuePtr() *int32 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg fileRefcountIterator) SetValue(val int32) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg fileRefcountIterator) PrevSegment() fileRefcountIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return fileRefcountIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return fileRefcountIterator{} + } + return fileRefcountsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg fileRefcountIterator) NextSegment() fileRefcountIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return fileRefcountIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return fileRefcountIterator{} + } + return fileRefcountsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg fileRefcountIterator) PrevGap() fileRefcountGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return fileRefcountGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg fileRefcountIterator) NextGap() fileRefcountGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return fileRefcountGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg fileRefcountIterator) PrevNonEmpty() (fileRefcountIterator, fileRefcountGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return fileRefcountIterator{}, gap + } + return gap.PrevSegment(), fileRefcountGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg fileRefcountIterator) NextNonEmpty() (fileRefcountIterator, fileRefcountGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return fileRefcountIterator{}, gap + } + return gap.NextSegment(), fileRefcountGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type fileRefcountGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *fileRefcountnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap fileRefcountGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap fileRefcountGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap fileRefcountGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return fileRefcountSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap fileRefcountGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return fileRefcountSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap fileRefcountGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap fileRefcountGapIterator) PrevSegment() fileRefcountIterator { + return fileRefcountsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap fileRefcountGapIterator) NextSegment() fileRefcountIterator { + return fileRefcountsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap fileRefcountGapIterator) PrevGap() fileRefcountGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return fileRefcountGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap fileRefcountGapIterator) NextGap() fileRefcountGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return fileRefcountGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func fileRefcountsegmentBeforePosition(n *fileRefcountnode, i int) fileRefcountIterator { + for i == 0 { + if n.parent == nil { + return fileRefcountIterator{} + } + n, i = n.parent, n.parentIndex + } + return fileRefcountIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func fileRefcountsegmentAfterPosition(n *fileRefcountnode, i int) fileRefcountIterator { + for i == n.nrSegments { + if n.parent == nil { + return fileRefcountIterator{} + } + n, i = n.parent, n.parentIndex + } + return fileRefcountIterator{n, i} +} + +func fileRefcountzeroValueSlice(slice []int32) { + + for i := range slice { + fileRefcountSetFunctions{}.ClearValue(&slice[i]) + } +} + +func fileRefcountzeroNodeSlice(slice []*fileRefcountnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *fileRefcountSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *fileRefcountnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *fileRefcountnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type fileRefcountSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []int32 +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *fileRefcountSet) ExportSortedSlices() *fileRefcountSegmentDataSlices { + var sds fileRefcountSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *fileRefcountSet) ImportSortedSlices(sds *fileRefcountSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *fileRefcountSet) saveRoot() *fileRefcountSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *fileRefcountSet) loadRoot(sds *fileRefcountSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/mm/io_list.go b/pkg/sentry/mm/io_list.go new file mode 100755 index 000000000..99c83c4b9 --- /dev/null +++ b/pkg/sentry/mm/io_list.go @@ -0,0 +1,173 @@ +package mm + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type ioElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (ioElementMapper) linkerFor(elem *ioResult) *ioResult { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type ioList struct { + head *ioResult + tail *ioResult +} + +// Reset resets list l to the empty state. +func (l *ioList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *ioList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *ioList) Front() *ioResult { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *ioList) Back() *ioResult { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *ioList) PushFront(e *ioResult) { + ioElementMapper{}.linkerFor(e).SetNext(l.head) + ioElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + ioElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *ioList) PushBack(e *ioResult) { + ioElementMapper{}.linkerFor(e).SetNext(nil) + ioElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + ioElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *ioList) PushBackList(m *ioList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + ioElementMapper{}.linkerFor(l.tail).SetNext(m.head) + ioElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *ioList) InsertAfter(b, e *ioResult) { + a := ioElementMapper{}.linkerFor(b).Next() + ioElementMapper{}.linkerFor(e).SetNext(a) + ioElementMapper{}.linkerFor(e).SetPrev(b) + ioElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + ioElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *ioList) InsertBefore(a, e *ioResult) { + b := ioElementMapper{}.linkerFor(a).Prev() + ioElementMapper{}.linkerFor(e).SetNext(a) + ioElementMapper{}.linkerFor(e).SetPrev(b) + ioElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + ioElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *ioList) Remove(e *ioResult) { + prev := ioElementMapper{}.linkerFor(e).Prev() + next := ioElementMapper{}.linkerFor(e).Next() + + if prev != nil { + ioElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + ioElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type ioEntry struct { + next *ioResult + prev *ioResult +} + +// Next returns the entry that follows e in the list. +func (e *ioEntry) Next() *ioResult { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *ioEntry) Prev() *ioResult { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *ioEntry) SetNext(elem *ioResult) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *ioEntry) SetPrev(elem *ioResult) { + e.prev = elem +} diff --git a/pkg/sentry/mm/mm_state_autogen.go b/pkg/sentry/mm/mm_state_autogen.go new file mode 100755 index 000000000..3b5af2401 --- /dev/null +++ b/pkg/sentry/mm/mm_state_autogen.go @@ -0,0 +1,388 @@ +// automatically generated by stateify. + +package mm + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *aioManager) beforeSave() {} +func (x *aioManager) save(m state.Map) { + x.beforeSave() + m.Save("contexts", &x.contexts) +} + +func (x *aioManager) afterLoad() {} +func (x *aioManager) load(m state.Map) { + m.Load("contexts", &x.contexts) +} + +func (x *ioResult) beforeSave() {} +func (x *ioResult) save(m state.Map) { + x.beforeSave() + m.Save("data", &x.data) + m.Save("ioEntry", &x.ioEntry) +} + +func (x *ioResult) afterLoad() {} +func (x *ioResult) load(m state.Map) { + m.Load("data", &x.data) + m.Load("ioEntry", &x.ioEntry) +} + +func (x *AIOContext) beforeSave() {} +func (x *AIOContext) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.dead) { m.Failf("dead is %v, expected zero", x.dead) } + m.Save("results", &x.results) + m.Save("maxOutstanding", &x.maxOutstanding) + m.Save("outstanding", &x.outstanding) +} + +func (x *AIOContext) load(m state.Map) { + m.Load("results", &x.results) + m.Load("maxOutstanding", &x.maxOutstanding) + m.Load("outstanding", &x.outstanding) + m.AfterLoad(x.afterLoad) +} + +func (x *aioMappable) beforeSave() {} +func (x *aioMappable) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("mfp", &x.mfp) + m.Save("fr", &x.fr) +} + +func (x *aioMappable) afterLoad() {} +func (x *aioMappable) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("mfp", &x.mfp) + m.Load("fr", &x.fr) +} + +func (x *fileRefcountSet) beforeSave() {} +func (x *fileRefcountSet) save(m state.Map) { + x.beforeSave() + var root *fileRefcountSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *fileRefcountSet) afterLoad() {} +func (x *fileRefcountSet) load(m state.Map) { + m.LoadValue("root", new(*fileRefcountSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*fileRefcountSegmentDataSlices)) }) +} + +func (x *fileRefcountnode) beforeSave() {} +func (x *fileRefcountnode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *fileRefcountnode) afterLoad() {} +func (x *fileRefcountnode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *fileRefcountSegmentDataSlices) beforeSave() {} +func (x *fileRefcountSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *fileRefcountSegmentDataSlices) afterLoad() {} +func (x *fileRefcountSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *ioList) beforeSave() {} +func (x *ioList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *ioList) afterLoad() {} +func (x *ioList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *ioEntry) beforeSave() {} +func (x *ioEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *ioEntry) afterLoad() {} +func (x *ioEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *MemoryManager) save(m state.Map) { + x.beforeSave() + if !state.IsZeroValue(x.active) { m.Failf("active is %v, expected zero", x.active) } + if !state.IsZeroValue(x.captureInvalidations) { m.Failf("captureInvalidations is %v, expected zero", x.captureInvalidations) } + m.Save("p", &x.p) + m.Save("mfp", &x.mfp) + m.Save("layout", &x.layout) + m.Save("privateRefs", &x.privateRefs) + m.Save("users", &x.users) + m.Save("vmas", &x.vmas) + m.Save("brk", &x.brk) + m.Save("usageAS", &x.usageAS) + m.Save("lockedAS", &x.lockedAS) + m.Save("dataAS", &x.dataAS) + m.Save("defMLockMode", &x.defMLockMode) + m.Save("pmas", &x.pmas) + m.Save("curRSS", &x.curRSS) + m.Save("maxRSS", &x.maxRSS) + m.Save("argv", &x.argv) + m.Save("envv", &x.envv) + m.Save("auxv", &x.auxv) + m.Save("executable", &x.executable) + m.Save("dumpability", &x.dumpability) + m.Save("aioManager", &x.aioManager) +} + +func (x *MemoryManager) load(m state.Map) { + m.Load("p", &x.p) + m.Load("mfp", &x.mfp) + m.Load("layout", &x.layout) + m.Load("privateRefs", &x.privateRefs) + m.Load("users", &x.users) + m.Load("vmas", &x.vmas) + m.Load("brk", &x.brk) + m.Load("usageAS", &x.usageAS) + m.Load("lockedAS", &x.lockedAS) + m.Load("dataAS", &x.dataAS) + m.Load("defMLockMode", &x.defMLockMode) + m.Load("pmas", &x.pmas) + m.Load("curRSS", &x.curRSS) + m.Load("maxRSS", &x.maxRSS) + m.Load("argv", &x.argv) + m.Load("envv", &x.envv) + m.Load("auxv", &x.auxv) + m.Load("executable", &x.executable) + m.Load("dumpability", &x.dumpability) + m.Load("aioManager", &x.aioManager) + m.AfterLoad(x.afterLoad) +} + +func (x *vma) beforeSave() {} +func (x *vma) save(m state.Map) { + x.beforeSave() + var realPerms int = x.saveRealPerms() + m.SaveValue("realPerms", realPerms) + m.Save("mappable", &x.mappable) + m.Save("off", &x.off) + m.Save("dontfork", &x.dontfork) + m.Save("mlockMode", &x.mlockMode) + m.Save("numaPolicy", &x.numaPolicy) + m.Save("numaNodemask", &x.numaNodemask) + m.Save("id", &x.id) + m.Save("hint", &x.hint) +} + +func (x *vma) afterLoad() {} +func (x *vma) load(m state.Map) { + m.Load("mappable", &x.mappable) + m.Load("off", &x.off) + m.Load("dontfork", &x.dontfork) + m.Load("mlockMode", &x.mlockMode) + m.Load("numaPolicy", &x.numaPolicy) + m.Load("numaNodemask", &x.numaNodemask) + m.Load("id", &x.id) + m.Load("hint", &x.hint) + m.LoadValue("realPerms", new(int), func(y interface{}) { x.loadRealPerms(y.(int)) }) +} + +func (x *pma) beforeSave() {} +func (x *pma) save(m state.Map) { + x.beforeSave() + m.Save("off", &x.off) + m.Save("translatePerms", &x.translatePerms) + m.Save("effectivePerms", &x.effectivePerms) + m.Save("maxPerms", &x.maxPerms) + m.Save("needCOW", &x.needCOW) + m.Save("private", &x.private) +} + +func (x *pma) afterLoad() {} +func (x *pma) load(m state.Map) { + m.Load("off", &x.off) + m.Load("translatePerms", &x.translatePerms) + m.Load("effectivePerms", &x.effectivePerms) + m.Load("maxPerms", &x.maxPerms) + m.Load("needCOW", &x.needCOW) + m.Load("private", &x.private) +} + +func (x *privateRefs) beforeSave() {} +func (x *privateRefs) save(m state.Map) { + x.beforeSave() + m.Save("refs", &x.refs) +} + +func (x *privateRefs) afterLoad() {} +func (x *privateRefs) load(m state.Map) { + m.Load("refs", &x.refs) +} + +func (x *pmaSet) beforeSave() {} +func (x *pmaSet) save(m state.Map) { + x.beforeSave() + var root *pmaSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *pmaSet) afterLoad() {} +func (x *pmaSet) load(m state.Map) { + m.LoadValue("root", new(*pmaSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*pmaSegmentDataSlices)) }) +} + +func (x *pmanode) beforeSave() {} +func (x *pmanode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *pmanode) afterLoad() {} +func (x *pmanode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *pmaSegmentDataSlices) beforeSave() {} +func (x *pmaSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *pmaSegmentDataSlices) afterLoad() {} +func (x *pmaSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *SpecialMappable) beforeSave() {} +func (x *SpecialMappable) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("mfp", &x.mfp) + m.Save("fr", &x.fr) + m.Save("name", &x.name) +} + +func (x *SpecialMappable) afterLoad() {} +func (x *SpecialMappable) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("mfp", &x.mfp) + m.Load("fr", &x.fr) + m.Load("name", &x.name) +} + +func (x *vmaSet) beforeSave() {} +func (x *vmaSet) save(m state.Map) { + x.beforeSave() + var root *vmaSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *vmaSet) afterLoad() {} +func (x *vmaSet) load(m state.Map) { + m.LoadValue("root", new(*vmaSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*vmaSegmentDataSlices)) }) +} + +func (x *vmanode) beforeSave() {} +func (x *vmanode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *vmanode) afterLoad() {} +func (x *vmanode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *vmaSegmentDataSlices) beforeSave() {} +func (x *vmaSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *vmaSegmentDataSlices) afterLoad() {} +func (x *vmaSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func init() { + state.Register("mm.aioManager", (*aioManager)(nil), state.Fns{Save: (*aioManager).save, Load: (*aioManager).load}) + state.Register("mm.ioResult", (*ioResult)(nil), state.Fns{Save: (*ioResult).save, Load: (*ioResult).load}) + state.Register("mm.AIOContext", (*AIOContext)(nil), state.Fns{Save: (*AIOContext).save, Load: (*AIOContext).load}) + state.Register("mm.aioMappable", (*aioMappable)(nil), state.Fns{Save: (*aioMappable).save, Load: (*aioMappable).load}) + state.Register("mm.fileRefcountSet", (*fileRefcountSet)(nil), state.Fns{Save: (*fileRefcountSet).save, Load: (*fileRefcountSet).load}) + state.Register("mm.fileRefcountnode", (*fileRefcountnode)(nil), state.Fns{Save: (*fileRefcountnode).save, Load: (*fileRefcountnode).load}) + state.Register("mm.fileRefcountSegmentDataSlices", (*fileRefcountSegmentDataSlices)(nil), state.Fns{Save: (*fileRefcountSegmentDataSlices).save, Load: (*fileRefcountSegmentDataSlices).load}) + state.Register("mm.ioList", (*ioList)(nil), state.Fns{Save: (*ioList).save, Load: (*ioList).load}) + state.Register("mm.ioEntry", (*ioEntry)(nil), state.Fns{Save: (*ioEntry).save, Load: (*ioEntry).load}) + state.Register("mm.MemoryManager", (*MemoryManager)(nil), state.Fns{Save: (*MemoryManager).save, Load: (*MemoryManager).load}) + state.Register("mm.vma", (*vma)(nil), state.Fns{Save: (*vma).save, Load: (*vma).load}) + state.Register("mm.pma", (*pma)(nil), state.Fns{Save: (*pma).save, Load: (*pma).load}) + state.Register("mm.privateRefs", (*privateRefs)(nil), state.Fns{Save: (*privateRefs).save, Load: (*privateRefs).load}) + state.Register("mm.pmaSet", (*pmaSet)(nil), state.Fns{Save: (*pmaSet).save, Load: (*pmaSet).load}) + state.Register("mm.pmanode", (*pmanode)(nil), state.Fns{Save: (*pmanode).save, Load: (*pmanode).load}) + state.Register("mm.pmaSegmentDataSlices", (*pmaSegmentDataSlices)(nil), state.Fns{Save: (*pmaSegmentDataSlices).save, Load: (*pmaSegmentDataSlices).load}) + state.Register("mm.SpecialMappable", (*SpecialMappable)(nil), state.Fns{Save: (*SpecialMappable).save, Load: (*SpecialMappable).load}) + state.Register("mm.vmaSet", (*vmaSet)(nil), state.Fns{Save: (*vmaSet).save, Load: (*vmaSet).load}) + state.Register("mm.vmanode", (*vmanode)(nil), state.Fns{Save: (*vmanode).save, Load: (*vmanode).load}) + state.Register("mm.vmaSegmentDataSlices", (*vmaSegmentDataSlices)(nil), state.Fns{Save: (*vmaSegmentDataSlices).save, Load: (*vmaSegmentDataSlices).load}) +} diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go deleted file mode 100644 index 4d2bfaaed..000000000 --- a/pkg/sentry/mm/mm_test.go +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mm - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" -) - -func testMemoryManager(ctx context.Context) *MemoryManager { - p := platform.FromContext(ctx) - mfp := pgalloc.MemoryFileProviderFromContext(ctx) - mm := NewMemoryManager(p, mfp) - mm.layout = arch.MmapLayout{ - MinAddr: p.MinUserAddress(), - MaxAddr: p.MaxUserAddress(), - BottomUpBase: p.MinUserAddress(), - TopDownBase: p.MaxUserAddress(), - } - return mm -} - -func (mm *MemoryManager) realUsageAS() uint64 { - return uint64(mm.vmas.Span()) -} - -func TestUsageASUpdates(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: 2 * usermem.PageSize, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - realUsage := mm.realUsageAS() - if mm.usageAS != realUsage { - t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage) - } - - mm.MUnmap(ctx, addr, usermem.PageSize) - realUsage = mm.realUsageAS() - if mm.usageAS != realUsage { - t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage) - } -} - -func (mm *MemoryManager) realDataAS() uint64 { - var sz uint64 - for seg := mm.vmas.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - vma := seg.Value() - if vma.isPrivateDataLocked() { - sz += uint64(seg.Range().Length()) - } - } - return sz -} - -func TestDataASUpdates(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: 3 * usermem.PageSize, - Private: true, - Perms: usermem.Write, - MaxPerms: usermem.AnyAccess, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - if mm.dataAS == 0 { - t.Fatalf("dataAS is 0, wanted not 0") - } - realDataAS := mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } - - mm.MUnmap(ctx, addr, usermem.PageSize) - realDataAS = mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } - - mm.MProtect(addr+usermem.PageSize, usermem.PageSize, usermem.Read, false) - realDataAS = mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } - - mm.MRemap(ctx, addr+2*usermem.PageSize, usermem.PageSize, 2*usermem.PageSize, MRemapOpts{ - Move: MRemapMayMove, - }) - realDataAS = mm.realDataAS() - if mm.dataAS != realDataAS { - t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) - } -} - -func TestBrkDataLimitUpdates(t *testing.T) { - limitSet := limits.NewLimitSet() - limitSet.Set(limits.Data, limits.Limit{}, true /* privileged */) // zero RLIMIT_DATA - - ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - // Try to extend the brk by one page and expect doing so to fail. - oldBrk, _ := mm.Brk(ctx, 0) - if newBrk, _ := mm.Brk(ctx, oldBrk+usermem.PageSize); newBrk != oldBrk { - t.Errorf("brk() increased data segment above RLIMIT_DATA (old brk = %#x, new brk = %#x", oldBrk, newBrk) - } -} - -// TestIOAfterUnmap ensures that IO fails after unmap. -func TestIOAfterUnmap(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: usermem.PageSize, - Private: true, - Perms: usermem.Read, - MaxPerms: usermem.AnyAccess, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - - // IO works before munmap. - b := make([]byte, 1) - n, err := mm.CopyIn(ctx, addr, b, usermem.IOOpts{}) - if err != nil { - t.Errorf("CopyIn got err %v want nil", err) - } - if n != 1 { - t.Errorf("CopyIn got %d want 1", n) - } - - err = mm.MUnmap(ctx, addr, usermem.PageSize) - if err != nil { - t.Fatalf("MUnmap got err %v want nil", err) - } - - n, err = mm.CopyIn(ctx, addr, b, usermem.IOOpts{}) - if err != syserror.EFAULT { - t.Errorf("CopyIn got err %v want EFAULT", err) - } - if n != 0 { - t.Errorf("CopyIn got %d want 0", n) - } -} - -// TestIOAfterMProtect tests IO interaction with mprotect permissions. -func TestIOAfterMProtect(t *testing.T) { - ctx := contexttest.Context(t) - mm := testMemoryManager(ctx) - defer mm.DecUsers(ctx) - - addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: usermem.PageSize, - Private: true, - Perms: usermem.ReadWrite, - MaxPerms: usermem.AnyAccess, - }) - if err != nil { - t.Fatalf("MMap got err %v want nil", err) - } - - // Writing works before mprotect. - b := make([]byte, 1) - n, err := mm.CopyOut(ctx, addr, b, usermem.IOOpts{}) - if err != nil { - t.Errorf("CopyOut got err %v want nil", err) - } - if n != 1 { - t.Errorf("CopyOut got %d want 1", n) - } - - err = mm.MProtect(addr, usermem.PageSize, usermem.Read, false) - if err != nil { - t.Errorf("MProtect got err %v want nil", err) - } - - // Without IgnorePermissions, CopyOut should no longer succeed. - n, err = mm.CopyOut(ctx, addr, b, usermem.IOOpts{}) - if err != syserror.EFAULT { - t.Errorf("CopyOut got err %v want EFAULT", err) - } - if n != 0 { - t.Errorf("CopyOut got %d want 0", n) - } - - // With IgnorePermissions, CopyOut should succeed despite mprotect. - n, err = mm.CopyOut(ctx, addr, b, usermem.IOOpts{ - IgnorePermissions: true, - }) - if err != nil { - t.Errorf("CopyOut got err %v want nil", err) - } - if n != 1 { - t.Errorf("CopyOut got %d want 1", n) - } -} diff --git a/pkg/sentry/mm/pma_set.go b/pkg/sentry/mm/pma_set.go new file mode 100755 index 000000000..52e66468c --- /dev/null +++ b/pkg/sentry/mm/pma_set.go @@ -0,0 +1,1274 @@ +package mm + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + pmaminDegree = 8 + + pmamaxDegree = 2 * pmaminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type pmaSet struct { + root pmanode `state:".(*pmaSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *pmaSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *pmaSet) IsEmptyRange(r __generics_imported0.AddrRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *pmaSet) Span() __generics_imported0.Addr { + var sz __generics_imported0.Addr + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *pmaSet) SpanRange(r __generics_imported0.AddrRange) __generics_imported0.Addr { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz __generics_imported0.Addr + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *pmaSet) FirstSegment() pmaIterator { + if s.root.nrSegments == 0 { + return pmaIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *pmaSet) LastSegment() pmaIterator { + if s.root.nrSegments == 0 { + return pmaIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *pmaSet) FirstGap() pmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return pmaGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *pmaSet) LastGap() pmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return pmaGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *pmaSet) Find(key __generics_imported0.Addr) (pmaIterator, pmaGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return pmaIterator{n, i}, pmaGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return pmaIterator{}, pmaGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *pmaSet) FindSegment(key __generics_imported0.Addr) pmaIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *pmaSet) LowerBoundSegment(min __generics_imported0.Addr) pmaIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *pmaSet) UpperBoundSegment(max __generics_imported0.Addr) pmaIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *pmaSet) FindGap(key __generics_imported0.Addr) pmaGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *pmaSet) LowerBoundGap(min __generics_imported0.Addr) pmaGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *pmaSet) UpperBoundGap(max __generics_imported0.Addr) pmaGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *pmaSet) Add(r __generics_imported0.AddrRange, val pma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *pmaSet) AddWithoutMerging(r __generics_imported0.AddrRange, val pma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *pmaSet) Insert(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (pmaSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (pmaSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (pmaSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *pmaSet) InsertWithoutMerging(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *pmaSet) InsertWithoutMergingUnchecked(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return pmaIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *pmaSet) Remove(seg pmaIterator) pmaGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + pmaSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(pmaGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *pmaSet) RemoveAll() { + s.root = pmanode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *pmaSet) RemoveRange(r __generics_imported0.AddrRange) pmaGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *pmaSet) Merge(first, second pmaIterator) pmaIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *pmaSet) MergeUnchecked(first, second pmaIterator) pmaIterator { + if first.End() == second.Start() { + if mval, ok := (pmaSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return pmaIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *pmaSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *pmaSet) MergeRange(r __generics_imported0.AddrRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *pmaSet) MergeAdjacent(r __generics_imported0.AddrRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *pmaSet) Split(seg pmaIterator, split __generics_imported0.Addr) (pmaIterator, pmaIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *pmaSet) SplitUnchecked(seg pmaIterator, split __generics_imported0.Addr) (pmaIterator, pmaIterator) { + val1, val2 := (pmaSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.AddrRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *pmaSet) SplitAt(split __generics_imported0.Addr) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *pmaSet) Isolate(seg pmaIterator, r __generics_imported0.AddrRange) pmaIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *pmaSet) ApplyContiguous(r __generics_imported0.AddrRange, fn func(seg pmaIterator)) pmaGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return pmaGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return pmaGapIterator{} + } + } +} + +// +stateify savable +type pmanode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *pmanode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [pmamaxDegree - 1]__generics_imported0.AddrRange + values [pmamaxDegree - 1]pma + children [pmamaxDegree]*pmanode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *pmanode) firstSegment() pmaIterator { + for n.hasChildren { + n = n.children[0] + } + return pmaIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *pmanode) lastSegment() pmaIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return pmaIterator{n, n.nrSegments - 1} +} + +func (n *pmanode) prevSibling() *pmanode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *pmanode) nextSibling() *pmanode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *pmanode) rebalanceBeforeInsert(gap pmaGapIterator) pmaGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < pmamaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &pmanode{ + nrSegments: pmaminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &pmanode{ + nrSegments: pmaminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:pmaminDegree-1], n.keys[:pmaminDegree-1]) + copy(left.values[:pmaminDegree-1], n.values[:pmaminDegree-1]) + copy(right.keys[:pmaminDegree-1], n.keys[pmaminDegree:]) + copy(right.values[:pmaminDegree-1], n.values[pmaminDegree:]) + n.keys[0], n.values[0] = n.keys[pmaminDegree-1], n.values[pmaminDegree-1] + pmazeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:pmaminDegree], n.children[:pmaminDegree]) + copy(right.children[:pmaminDegree], n.children[pmaminDegree:]) + pmazeroNodeSlice(n.children[2:]) + for i := 0; i < pmaminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < pmaminDegree { + return pmaGapIterator{left, gap.index} + } + return pmaGapIterator{right, gap.index - pmaminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[pmaminDegree-1], n.values[pmaminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &pmanode{ + nrSegments: pmaminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:pmaminDegree-1], n.keys[pmaminDegree:]) + copy(sibling.values[:pmaminDegree-1], n.values[pmaminDegree:]) + pmazeroValueSlice(n.values[pmaminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:pmaminDegree], n.children[pmaminDegree:]) + pmazeroNodeSlice(n.children[pmaminDegree:]) + for i := 0; i < pmaminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = pmaminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < pmaminDegree { + return gap + } + return pmaGapIterator{sibling, gap.index - pmaminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *pmanode) rebalanceAfterRemove(gap pmaGapIterator) pmaGapIterator { + for { + if n.nrSegments >= pmaminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= pmaminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + pmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return pmaGapIterator{n, 0} + } + if gap.node == n { + return pmaGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= pmaminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + pmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return pmaGapIterator{n, n.nrSegments} + } + return pmaGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return pmaGapIterator{p, gap.index} + } + if gap.node == right { + return pmaGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *pmanode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = pmaGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + pmaSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type pmaIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *pmanode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg pmaIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg pmaIterator) Range() __generics_imported0.AddrRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg pmaIterator) Start() __generics_imported0.Addr { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg pmaIterator) End() __generics_imported0.Addr { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg pmaIterator) SetRangeUnchecked(r __generics_imported0.AddrRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg pmaIterator) SetRange(r __generics_imported0.AddrRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg pmaIterator) SetStartUnchecked(start __generics_imported0.Addr) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg pmaIterator) SetStart(start __generics_imported0.Addr) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg pmaIterator) SetEndUnchecked(end __generics_imported0.Addr) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg pmaIterator) SetEnd(end __generics_imported0.Addr) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg pmaIterator) Value() pma { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg pmaIterator) ValuePtr() *pma { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg pmaIterator) SetValue(val pma) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg pmaIterator) PrevSegment() pmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return pmaIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return pmaIterator{} + } + return pmasegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg pmaIterator) NextSegment() pmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return pmaIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return pmaIterator{} + } + return pmasegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg pmaIterator) PrevGap() pmaGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return pmaGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg pmaIterator) NextGap() pmaGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return pmaGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg pmaIterator) PrevNonEmpty() (pmaIterator, pmaGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return pmaIterator{}, gap + } + return gap.PrevSegment(), pmaGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg pmaIterator) NextNonEmpty() (pmaIterator, pmaGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return pmaIterator{}, gap + } + return gap.NextSegment(), pmaGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type pmaGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *pmanode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap pmaGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap pmaGapIterator) Range() __generics_imported0.AddrRange { + return __generics_imported0.AddrRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap pmaGapIterator) Start() __generics_imported0.Addr { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return pmaSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap pmaGapIterator) End() __generics_imported0.Addr { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return pmaSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap pmaGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap pmaGapIterator) PrevSegment() pmaIterator { + return pmasegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap pmaGapIterator) NextSegment() pmaIterator { + return pmasegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap pmaGapIterator) PrevGap() pmaGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return pmaGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap pmaGapIterator) NextGap() pmaGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return pmaGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func pmasegmentBeforePosition(n *pmanode, i int) pmaIterator { + for i == 0 { + if n.parent == nil { + return pmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return pmaIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func pmasegmentAfterPosition(n *pmanode, i int) pmaIterator { + for i == n.nrSegments { + if n.parent == nil { + return pmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return pmaIterator{n, i} +} + +func pmazeroValueSlice(slice []pma) { + + for i := range slice { + pmaSetFunctions{}.ClearValue(&slice[i]) + } +} + +func pmazeroNodeSlice(slice []*pmanode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *pmaSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *pmanode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *pmanode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type pmaSegmentDataSlices struct { + Start []__generics_imported0.Addr + End []__generics_imported0.Addr + Values []pma +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *pmaSet) ExportSortedSlices() *pmaSegmentDataSlices { + var sds pmaSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *pmaSet) ImportSortedSlices(sds *pmaSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.AddrRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *pmaSet) saveRoot() *pmaSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *pmaSet) loadRoot(sds *pmaSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/mm/vma_set.go b/pkg/sentry/mm/vma_set.go new file mode 100755 index 000000000..c3ef24f34 --- /dev/null +++ b/pkg/sentry/mm/vma_set.go @@ -0,0 +1,1274 @@ +package mm + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + vmaminDegree = 8 + + vmamaxDegree = 2 * vmaminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type vmaSet struct { + root vmanode `state:".(*vmaSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *vmaSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *vmaSet) IsEmptyRange(r __generics_imported0.AddrRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *vmaSet) Span() __generics_imported0.Addr { + var sz __generics_imported0.Addr + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *vmaSet) SpanRange(r __generics_imported0.AddrRange) __generics_imported0.Addr { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz __generics_imported0.Addr + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *vmaSet) FirstSegment() vmaIterator { + if s.root.nrSegments == 0 { + return vmaIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *vmaSet) LastSegment() vmaIterator { + if s.root.nrSegments == 0 { + return vmaIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *vmaSet) FirstGap() vmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return vmaGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *vmaSet) LastGap() vmaGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return vmaGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *vmaSet) Find(key __generics_imported0.Addr) (vmaIterator, vmaGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return vmaIterator{n, i}, vmaGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return vmaIterator{}, vmaGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *vmaSet) FindSegment(key __generics_imported0.Addr) vmaIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *vmaSet) LowerBoundSegment(min __generics_imported0.Addr) vmaIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *vmaSet) UpperBoundSegment(max __generics_imported0.Addr) vmaIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *vmaSet) FindGap(key __generics_imported0.Addr) vmaGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *vmaSet) LowerBoundGap(min __generics_imported0.Addr) vmaGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *vmaSet) UpperBoundGap(max __generics_imported0.Addr) vmaGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *vmaSet) Add(r __generics_imported0.AddrRange, val vma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *vmaSet) AddWithoutMerging(r __generics_imported0.AddrRange, val vma) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *vmaSet) Insert(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (vmaSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (vmaSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (vmaSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *vmaSet) InsertWithoutMerging(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *vmaSet) InsertWithoutMergingUnchecked(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return vmaIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *vmaSet) Remove(seg vmaIterator) vmaGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + vmaSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(vmaGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *vmaSet) RemoveAll() { + s.root = vmanode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *vmaSet) RemoveRange(r __generics_imported0.AddrRange) vmaGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *vmaSet) Merge(first, second vmaIterator) vmaIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *vmaSet) MergeUnchecked(first, second vmaIterator) vmaIterator { + if first.End() == second.Start() { + if mval, ok := (vmaSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return vmaIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *vmaSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *vmaSet) MergeRange(r __generics_imported0.AddrRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *vmaSet) MergeAdjacent(r __generics_imported0.AddrRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *vmaSet) Split(seg vmaIterator, split __generics_imported0.Addr) (vmaIterator, vmaIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *vmaSet) SplitUnchecked(seg vmaIterator, split __generics_imported0.Addr) (vmaIterator, vmaIterator) { + val1, val2 := (vmaSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.AddrRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *vmaSet) SplitAt(split __generics_imported0.Addr) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *vmaSet) Isolate(seg vmaIterator, r __generics_imported0.AddrRange) vmaIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *vmaSet) ApplyContiguous(r __generics_imported0.AddrRange, fn func(seg vmaIterator)) vmaGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return vmaGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return vmaGapIterator{} + } + } +} + +// +stateify savable +type vmanode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *vmanode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [vmamaxDegree - 1]__generics_imported0.AddrRange + values [vmamaxDegree - 1]vma + children [vmamaxDegree]*vmanode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *vmanode) firstSegment() vmaIterator { + for n.hasChildren { + n = n.children[0] + } + return vmaIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *vmanode) lastSegment() vmaIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return vmaIterator{n, n.nrSegments - 1} +} + +func (n *vmanode) prevSibling() *vmanode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *vmanode) nextSibling() *vmanode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *vmanode) rebalanceBeforeInsert(gap vmaGapIterator) vmaGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < vmamaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &vmanode{ + nrSegments: vmaminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &vmanode{ + nrSegments: vmaminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:vmaminDegree-1], n.keys[:vmaminDegree-1]) + copy(left.values[:vmaminDegree-1], n.values[:vmaminDegree-1]) + copy(right.keys[:vmaminDegree-1], n.keys[vmaminDegree:]) + copy(right.values[:vmaminDegree-1], n.values[vmaminDegree:]) + n.keys[0], n.values[0] = n.keys[vmaminDegree-1], n.values[vmaminDegree-1] + vmazeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:vmaminDegree], n.children[:vmaminDegree]) + copy(right.children[:vmaminDegree], n.children[vmaminDegree:]) + vmazeroNodeSlice(n.children[2:]) + for i := 0; i < vmaminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < vmaminDegree { + return vmaGapIterator{left, gap.index} + } + return vmaGapIterator{right, gap.index - vmaminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[vmaminDegree-1], n.values[vmaminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &vmanode{ + nrSegments: vmaminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:vmaminDegree-1], n.keys[vmaminDegree:]) + copy(sibling.values[:vmaminDegree-1], n.values[vmaminDegree:]) + vmazeroValueSlice(n.values[vmaminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:vmaminDegree], n.children[vmaminDegree:]) + vmazeroNodeSlice(n.children[vmaminDegree:]) + for i := 0; i < vmaminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = vmaminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < vmaminDegree { + return gap + } + return vmaGapIterator{sibling, gap.index - vmaminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *vmanode) rebalanceAfterRemove(gap vmaGapIterator) vmaGapIterator { + for { + if n.nrSegments >= vmaminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= vmaminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + vmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return vmaGapIterator{n, 0} + } + if gap.node == n { + return vmaGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= vmaminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + vmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return vmaGapIterator{n, n.nrSegments} + } + return vmaGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return vmaGapIterator{p, gap.index} + } + if gap.node == right { + return vmaGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *vmanode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = vmaGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + vmaSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type vmaIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *vmanode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg vmaIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg vmaIterator) Range() __generics_imported0.AddrRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg vmaIterator) Start() __generics_imported0.Addr { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg vmaIterator) End() __generics_imported0.Addr { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg vmaIterator) SetRangeUnchecked(r __generics_imported0.AddrRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg vmaIterator) SetRange(r __generics_imported0.AddrRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg vmaIterator) SetStartUnchecked(start __generics_imported0.Addr) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg vmaIterator) SetStart(start __generics_imported0.Addr) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg vmaIterator) SetEndUnchecked(end __generics_imported0.Addr) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg vmaIterator) SetEnd(end __generics_imported0.Addr) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg vmaIterator) Value() vma { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg vmaIterator) ValuePtr() *vma { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg vmaIterator) SetValue(val vma) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg vmaIterator) PrevSegment() vmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return vmaIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return vmaIterator{} + } + return vmasegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg vmaIterator) NextSegment() vmaIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return vmaIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return vmaIterator{} + } + return vmasegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg vmaIterator) PrevGap() vmaGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return vmaGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg vmaIterator) NextGap() vmaGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return vmaGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg vmaIterator) PrevNonEmpty() (vmaIterator, vmaGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return vmaIterator{}, gap + } + return gap.PrevSegment(), vmaGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg vmaIterator) NextNonEmpty() (vmaIterator, vmaGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return vmaIterator{}, gap + } + return gap.NextSegment(), vmaGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type vmaGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *vmanode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap vmaGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap vmaGapIterator) Range() __generics_imported0.AddrRange { + return __generics_imported0.AddrRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap vmaGapIterator) Start() __generics_imported0.Addr { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return vmaSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap vmaGapIterator) End() __generics_imported0.Addr { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return vmaSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap vmaGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap vmaGapIterator) PrevSegment() vmaIterator { + return vmasegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap vmaGapIterator) NextSegment() vmaIterator { + return vmasegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap vmaGapIterator) PrevGap() vmaGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return vmaGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap vmaGapIterator) NextGap() vmaGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return vmaGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func vmasegmentBeforePosition(n *vmanode, i int) vmaIterator { + for i == 0 { + if n.parent == nil { + return vmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return vmaIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func vmasegmentAfterPosition(n *vmanode, i int) vmaIterator { + for i == n.nrSegments { + if n.parent == nil { + return vmaIterator{} + } + n, i = n.parent, n.parentIndex + } + return vmaIterator{n, i} +} + +func vmazeroValueSlice(slice []vma) { + + for i := range slice { + vmaSetFunctions{}.ClearValue(&slice[i]) + } +} + +func vmazeroNodeSlice(slice []*vmanode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *vmaSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *vmanode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *vmanode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type vmaSegmentDataSlices struct { + Start []__generics_imported0.Addr + End []__generics_imported0.Addr + Values []vma +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *vmaSet) ExportSortedSlices() *vmaSegmentDataSlices { + var sds vmaSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *vmaSet) ImportSortedSlices(sds *vmaSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.AddrRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *vmaSet) saveRoot() *vmaSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *vmaSet) loadRoot(sds *vmaSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD deleted file mode 100644 index 858f895f2..000000000 --- a/pkg/sentry/pgalloc/BUILD +++ /dev/null @@ -1,85 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "evictable_range", - out = "evictable_range.go", - package = "pgalloc", - prefix = "Evictable", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "evictable_range_set", - out = "evictable_range_set.go", - package = "pgalloc", - prefix = "evictableRange", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "EvictableRange", - "Value": "evictableRangeSetValue", - "Functions": "evictableRangeSetFunctions", - }, -) - -go_template_instance( - name = "usage_set", - out = "usage_set.go", - consts = { - "minDegree": "10", - }, - imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", - }, - package = "pgalloc", - prefix = "usage", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "platform.FileRange", - "Value": "usageInfo", - "Functions": "usageSetFunctions", - }, -) - -go_library( - name = "pgalloc", - srcs = [ - "context.go", - "evictable_range.go", - "evictable_range_set.go", - "pgalloc.go", - "pgalloc_unsafe.go", - "save_restore.go", - "usage_set.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/pgalloc", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/log", - "//pkg/memutil", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/hostmm", - "//pkg/sentry/platform", - "//pkg/sentry/safemem", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/state", - "//pkg/syserror", - ], -) - -go_test( - name = "pgalloc_test", - size = "small", - srcs = ["pgalloc_test.go"], - embed = [":pgalloc"], - deps = ["//pkg/sentry/usermem"], -) diff --git a/pkg/sentry/pgalloc/evictable_range.go b/pkg/sentry/pgalloc/evictable_range.go new file mode 100755 index 000000000..10ce2ff44 --- /dev/null +++ b/pkg/sentry/pgalloc/evictable_range.go @@ -0,0 +1,62 @@ +package pgalloc + +// A Range represents a contiguous range of T. +// +// +stateify savable +type EvictableRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r EvictableRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r EvictableRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r EvictableRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r EvictableRange) Overlaps(r2 EvictableRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r EvictableRange) IsSupersetOf(r2 EvictableRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r EvictableRange) Intersect(r2 EvictableRange) EvictableRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r EvictableRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/pgalloc/evictable_range_set.go b/pkg/sentry/pgalloc/evictable_range_set.go new file mode 100755 index 000000000..6fbd02434 --- /dev/null +++ b/pkg/sentry/pgalloc/evictable_range_set.go @@ -0,0 +1,1270 @@ +package pgalloc + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + evictableRangeminDegree = 3 + + evictableRangemaxDegree = 2 * evictableRangeminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type evictableRangeSet struct { + root evictableRangenode `state:".(*evictableRangeSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *evictableRangeSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *evictableRangeSet) IsEmptyRange(r EvictableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *evictableRangeSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *evictableRangeSet) SpanRange(r EvictableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *evictableRangeSet) FirstSegment() evictableRangeIterator { + if s.root.nrSegments == 0 { + return evictableRangeIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *evictableRangeSet) LastSegment() evictableRangeIterator { + if s.root.nrSegments == 0 { + return evictableRangeIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *evictableRangeSet) FirstGap() evictableRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return evictableRangeGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *evictableRangeSet) LastGap() evictableRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return evictableRangeGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *evictableRangeSet) Find(key uint64) (evictableRangeIterator, evictableRangeGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return evictableRangeIterator{n, i}, evictableRangeGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return evictableRangeIterator{}, evictableRangeGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *evictableRangeSet) FindSegment(key uint64) evictableRangeIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *evictableRangeSet) LowerBoundSegment(min uint64) evictableRangeIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *evictableRangeSet) UpperBoundSegment(max uint64) evictableRangeIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *evictableRangeSet) FindGap(key uint64) evictableRangeGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *evictableRangeSet) LowerBoundGap(min uint64) evictableRangeGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *evictableRangeSet) UpperBoundGap(max uint64) evictableRangeGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *evictableRangeSet) Add(r EvictableRange, val evictableRangeSetValue) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *evictableRangeSet) AddWithoutMerging(r EvictableRange, val evictableRangeSetValue) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *evictableRangeSet) Insert(gap evictableRangeGapIterator, r EvictableRange, val evictableRangeSetValue) evictableRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (evictableRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (evictableRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (evictableRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *evictableRangeSet) InsertWithoutMerging(gap evictableRangeGapIterator, r EvictableRange, val evictableRangeSetValue) evictableRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *evictableRangeSet) InsertWithoutMergingUnchecked(gap evictableRangeGapIterator, r EvictableRange, val evictableRangeSetValue) evictableRangeIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return evictableRangeIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *evictableRangeSet) Remove(seg evictableRangeIterator) evictableRangeGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + evictableRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(evictableRangeGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *evictableRangeSet) RemoveAll() { + s.root = evictableRangenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *evictableRangeSet) RemoveRange(r EvictableRange) evictableRangeGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *evictableRangeSet) Merge(first, second evictableRangeIterator) evictableRangeIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *evictableRangeSet) MergeUnchecked(first, second evictableRangeIterator) evictableRangeIterator { + if first.End() == second.Start() { + if mval, ok := (evictableRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return evictableRangeIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *evictableRangeSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *evictableRangeSet) MergeRange(r EvictableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *evictableRangeSet) MergeAdjacent(r EvictableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *evictableRangeSet) Split(seg evictableRangeIterator, split uint64) (evictableRangeIterator, evictableRangeIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *evictableRangeSet) SplitUnchecked(seg evictableRangeIterator, split uint64) (evictableRangeIterator, evictableRangeIterator) { + val1, val2 := (evictableRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), EvictableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *evictableRangeSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *evictableRangeSet) Isolate(seg evictableRangeIterator, r EvictableRange) evictableRangeIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *evictableRangeSet) ApplyContiguous(r EvictableRange, fn func(seg evictableRangeIterator)) evictableRangeGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return evictableRangeGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return evictableRangeGapIterator{} + } + } +} + +// +stateify savable +type evictableRangenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *evictableRangenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [evictableRangemaxDegree - 1]EvictableRange + values [evictableRangemaxDegree - 1]evictableRangeSetValue + children [evictableRangemaxDegree]*evictableRangenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *evictableRangenode) firstSegment() evictableRangeIterator { + for n.hasChildren { + n = n.children[0] + } + return evictableRangeIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *evictableRangenode) lastSegment() evictableRangeIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return evictableRangeIterator{n, n.nrSegments - 1} +} + +func (n *evictableRangenode) prevSibling() *evictableRangenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *evictableRangenode) nextSibling() *evictableRangenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *evictableRangenode) rebalanceBeforeInsert(gap evictableRangeGapIterator) evictableRangeGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < evictableRangemaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &evictableRangenode{ + nrSegments: evictableRangeminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &evictableRangenode{ + nrSegments: evictableRangeminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:evictableRangeminDegree-1], n.keys[:evictableRangeminDegree-1]) + copy(left.values[:evictableRangeminDegree-1], n.values[:evictableRangeminDegree-1]) + copy(right.keys[:evictableRangeminDegree-1], n.keys[evictableRangeminDegree:]) + copy(right.values[:evictableRangeminDegree-1], n.values[evictableRangeminDegree:]) + n.keys[0], n.values[0] = n.keys[evictableRangeminDegree-1], n.values[evictableRangeminDegree-1] + evictableRangezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:evictableRangeminDegree], n.children[:evictableRangeminDegree]) + copy(right.children[:evictableRangeminDegree], n.children[evictableRangeminDegree:]) + evictableRangezeroNodeSlice(n.children[2:]) + for i := 0; i < evictableRangeminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < evictableRangeminDegree { + return evictableRangeGapIterator{left, gap.index} + } + return evictableRangeGapIterator{right, gap.index - evictableRangeminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[evictableRangeminDegree-1], n.values[evictableRangeminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &evictableRangenode{ + nrSegments: evictableRangeminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:evictableRangeminDegree-1], n.keys[evictableRangeminDegree:]) + copy(sibling.values[:evictableRangeminDegree-1], n.values[evictableRangeminDegree:]) + evictableRangezeroValueSlice(n.values[evictableRangeminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:evictableRangeminDegree], n.children[evictableRangeminDegree:]) + evictableRangezeroNodeSlice(n.children[evictableRangeminDegree:]) + for i := 0; i < evictableRangeminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = evictableRangeminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < evictableRangeminDegree { + return gap + } + return evictableRangeGapIterator{sibling, gap.index - evictableRangeminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *evictableRangenode) rebalanceAfterRemove(gap evictableRangeGapIterator) evictableRangeGapIterator { + for { + if n.nrSegments >= evictableRangeminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= evictableRangeminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + evictableRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return evictableRangeGapIterator{n, 0} + } + if gap.node == n { + return evictableRangeGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= evictableRangeminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + evictableRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return evictableRangeGapIterator{n, n.nrSegments} + } + return evictableRangeGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return evictableRangeGapIterator{p, gap.index} + } + if gap.node == right { + return evictableRangeGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *evictableRangenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = evictableRangeGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + evictableRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type evictableRangeIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *evictableRangenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg evictableRangeIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg evictableRangeIterator) Range() EvictableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg evictableRangeIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg evictableRangeIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg evictableRangeIterator) SetRangeUnchecked(r EvictableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg evictableRangeIterator) SetRange(r EvictableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg evictableRangeIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg evictableRangeIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg evictableRangeIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg evictableRangeIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg evictableRangeIterator) Value() evictableRangeSetValue { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg evictableRangeIterator) ValuePtr() *evictableRangeSetValue { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg evictableRangeIterator) SetValue(val evictableRangeSetValue) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg evictableRangeIterator) PrevSegment() evictableRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return evictableRangeIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return evictableRangeIterator{} + } + return evictableRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg evictableRangeIterator) NextSegment() evictableRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return evictableRangeIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return evictableRangeIterator{} + } + return evictableRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg evictableRangeIterator) PrevGap() evictableRangeGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return evictableRangeGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg evictableRangeIterator) NextGap() evictableRangeGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return evictableRangeGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg evictableRangeIterator) PrevNonEmpty() (evictableRangeIterator, evictableRangeGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return evictableRangeIterator{}, gap + } + return gap.PrevSegment(), evictableRangeGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg evictableRangeIterator) NextNonEmpty() (evictableRangeIterator, evictableRangeGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return evictableRangeIterator{}, gap + } + return gap.NextSegment(), evictableRangeGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type evictableRangeGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *evictableRangenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap evictableRangeGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap evictableRangeGapIterator) Range() EvictableRange { + return EvictableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap evictableRangeGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return evictableRangeSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap evictableRangeGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return evictableRangeSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap evictableRangeGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap evictableRangeGapIterator) PrevSegment() evictableRangeIterator { + return evictableRangesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap evictableRangeGapIterator) NextSegment() evictableRangeIterator { + return evictableRangesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap evictableRangeGapIterator) PrevGap() evictableRangeGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return evictableRangeGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap evictableRangeGapIterator) NextGap() evictableRangeGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return evictableRangeGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func evictableRangesegmentBeforePosition(n *evictableRangenode, i int) evictableRangeIterator { + for i == 0 { + if n.parent == nil { + return evictableRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return evictableRangeIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func evictableRangesegmentAfterPosition(n *evictableRangenode, i int) evictableRangeIterator { + for i == n.nrSegments { + if n.parent == nil { + return evictableRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return evictableRangeIterator{n, i} +} + +func evictableRangezeroValueSlice(slice []evictableRangeSetValue) { + + for i := range slice { + evictableRangeSetFunctions{}.ClearValue(&slice[i]) + } +} + +func evictableRangezeroNodeSlice(slice []*evictableRangenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *evictableRangeSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *evictableRangenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *evictableRangenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type evictableRangeSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []evictableRangeSetValue +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *evictableRangeSet) ExportSortedSlices() *evictableRangeSegmentDataSlices { + var sds evictableRangeSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *evictableRangeSet) ImportSortedSlices(sds *evictableRangeSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := EvictableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *evictableRangeSet) saveRoot() *evictableRangeSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *evictableRangeSet) loadRoot(sds *evictableRangeSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/pgalloc/pgalloc_state_autogen.go b/pkg/sentry/pgalloc/pgalloc_state_autogen.go new file mode 100755 index 000000000..1cfacc241 --- /dev/null +++ b/pkg/sentry/pgalloc/pgalloc_state_autogen.go @@ -0,0 +1,146 @@ +// automatically generated by stateify. + +package pgalloc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *EvictableRange) beforeSave() {} +func (x *EvictableRange) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) +} + +func (x *EvictableRange) afterLoad() {} +func (x *EvictableRange) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) +} + +func (x *evictableRangeSet) beforeSave() {} +func (x *evictableRangeSet) save(m state.Map) { + x.beforeSave() + var root *evictableRangeSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *evictableRangeSet) afterLoad() {} +func (x *evictableRangeSet) load(m state.Map) { + m.LoadValue("root", new(*evictableRangeSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*evictableRangeSegmentDataSlices)) }) +} + +func (x *evictableRangenode) beforeSave() {} +func (x *evictableRangenode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *evictableRangenode) afterLoad() {} +func (x *evictableRangenode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *evictableRangeSegmentDataSlices) beforeSave() {} +func (x *evictableRangeSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *evictableRangeSegmentDataSlices) afterLoad() {} +func (x *evictableRangeSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func (x *usageInfo) beforeSave() {} +func (x *usageInfo) save(m state.Map) { + x.beforeSave() + m.Save("kind", &x.kind) + m.Save("knownCommitted", &x.knownCommitted) + m.Save("refs", &x.refs) +} + +func (x *usageInfo) afterLoad() {} +func (x *usageInfo) load(m state.Map) { + m.Load("kind", &x.kind) + m.Load("knownCommitted", &x.knownCommitted) + m.Load("refs", &x.refs) +} + +func (x *usageSet) beforeSave() {} +func (x *usageSet) save(m state.Map) { + x.beforeSave() + var root *usageSegmentDataSlices = x.saveRoot() + m.SaveValue("root", root) +} + +func (x *usageSet) afterLoad() {} +func (x *usageSet) load(m state.Map) { + m.LoadValue("root", new(*usageSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*usageSegmentDataSlices)) }) +} + +func (x *usagenode) beforeSave() {} +func (x *usagenode) save(m state.Map) { + x.beforeSave() + m.Save("nrSegments", &x.nrSegments) + m.Save("parent", &x.parent) + m.Save("parentIndex", &x.parentIndex) + m.Save("hasChildren", &x.hasChildren) + m.Save("keys", &x.keys) + m.Save("values", &x.values) + m.Save("children", &x.children) +} + +func (x *usagenode) afterLoad() {} +func (x *usagenode) load(m state.Map) { + m.Load("nrSegments", &x.nrSegments) + m.Load("parent", &x.parent) + m.Load("parentIndex", &x.parentIndex) + m.Load("hasChildren", &x.hasChildren) + m.Load("keys", &x.keys) + m.Load("values", &x.values) + m.Load("children", &x.children) +} + +func (x *usageSegmentDataSlices) beforeSave() {} +func (x *usageSegmentDataSlices) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) + m.Save("Values", &x.Values) +} + +func (x *usageSegmentDataSlices) afterLoad() {} +func (x *usageSegmentDataSlices) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) + m.Load("Values", &x.Values) +} + +func init() { + state.Register("pgalloc.EvictableRange", (*EvictableRange)(nil), state.Fns{Save: (*EvictableRange).save, Load: (*EvictableRange).load}) + state.Register("pgalloc.evictableRangeSet", (*evictableRangeSet)(nil), state.Fns{Save: (*evictableRangeSet).save, Load: (*evictableRangeSet).load}) + state.Register("pgalloc.evictableRangenode", (*evictableRangenode)(nil), state.Fns{Save: (*evictableRangenode).save, Load: (*evictableRangenode).load}) + state.Register("pgalloc.evictableRangeSegmentDataSlices", (*evictableRangeSegmentDataSlices)(nil), state.Fns{Save: (*evictableRangeSegmentDataSlices).save, Load: (*evictableRangeSegmentDataSlices).load}) + state.Register("pgalloc.usageInfo", (*usageInfo)(nil), state.Fns{Save: (*usageInfo).save, Load: (*usageInfo).load}) + state.Register("pgalloc.usageSet", (*usageSet)(nil), state.Fns{Save: (*usageSet).save, Load: (*usageSet).load}) + state.Register("pgalloc.usagenode", (*usagenode)(nil), state.Fns{Save: (*usagenode).save, Load: (*usagenode).load}) + state.Register("pgalloc.usageSegmentDataSlices", (*usageSegmentDataSlices)(nil), state.Fns{Save: (*usageSegmentDataSlices).save, Load: (*usageSegmentDataSlices).load}) +} diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go deleted file mode 100644 index 428e6a859..000000000 --- a/pkg/sentry/pgalloc/pgalloc_test.go +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pgalloc - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -const ( - page = usermem.PageSize - hugepage = usermem.HugePageSize -) - -func TestFindUnallocatedRange(t *testing.T) { - for _, test := range []struct { - desc string - usage *usageSegmentDataSlices - start uint64 - length uint64 - alignment uint64 - unallocated uint64 - minUnallocated uint64 - }{ - { - desc: "Initial allocation succeeds", - usage: &usageSegmentDataSlices{}, - start: 0, - length: page, - alignment: page, - unallocated: 0, - minUnallocated: 0, - }, - { - desc: "Allocation begins at start of file", - usage: &usageSegmentDataSlices{ - Start: []uint64{page}, - End: []uint64{2 * page}, - Values: []usageInfo{{refs: 1}}, - }, - start: 0, - length: page, - alignment: page, - unallocated: 0, - minUnallocated: 0, - }, - { - desc: "In-use frames are not allocatable", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, page}, - End: []uint64{page, 2 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}}, - }, - start: 0, - length: page, - alignment: page, - unallocated: 2 * page, - minUnallocated: 2 * page, - }, - { - desc: "Reclaimable frames are not allocatable", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, page, 2 * page}, - End: []uint64{page, 2 * page, 3 * page}, - Values: []usageInfo{{refs: 1}, {refs: 0}, {refs: 1}}, - }, - start: 0, - length: page, - alignment: page, - unallocated: 3 * page, - minUnallocated: 3 * page, - }, - { - desc: "Gaps between in-use frames are allocatable", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, 2 * page}, - End: []uint64{page, 3 * page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - start: 0, - length: page, - alignment: page, - unallocated: page, - minUnallocated: page, - }, - { - desc: "Inadequately-sized gaps are rejected", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, 2 * page}, - End: []uint64{page, 3 * page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - start: 0, - length: 2 * page, - alignment: page, - unallocated: 3 * page, - minUnallocated: page, - }, - { - desc: "Hugepage alignment is honored", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, hugepage + page}, - // Hugepage-sized gap here that shouldn't be allocated from - // since it's incorrectly aligned. - End: []uint64{page, hugepage + 2*page}, - Values: []usageInfo{{refs: 1}, {refs: 1}}, - }, - start: 0, - length: hugepage, - alignment: hugepage, - unallocated: 2 * hugepage, - minUnallocated: page, - }, - { - desc: "Pages before start ignored", - usage: &usageSegmentDataSlices{ - Start: []uint64{page, 3 * page}, - End: []uint64{2 * page, 4 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}}, - }, - start: page, - length: page, - alignment: page, - unallocated: 2 * page, - minUnallocated: 2 * page, - }, - { - desc: "start may be in the middle of segment", - usage: &usageSegmentDataSlices{ - Start: []uint64{0, 3 * page}, - End: []uint64{2 * page, 4 * page}, - Values: []usageInfo{{refs: 1}, {refs: 2}}, - }, - start: page, - length: page, - alignment: page, - unallocated: 2 * page, - minUnallocated: 2 * page, - }, - } { - t.Run(test.desc, func(t *testing.T) { - var usage usageSet - if err := usage.ImportSortedSlices(test.usage); err != nil { - t.Fatalf("Failed to initialize usage from %v: %v", test.usage, err) - } - unallocated, minUnallocated := findUnallocatedRange(&usage, test.start, test.length, test.alignment) - if unallocated != test.unallocated { - t.Errorf("findUnallocatedRange(%v, %x, %x, %x): got unallocated %x, wanted %x", test.usage, test.start, test.length, test.alignment, unallocated, test.unallocated) - } - if minUnallocated != test.minUnallocated { - t.Errorf("findUnallocatedRange(%v, %x, %x, %x): got minUnallocated %x, wanted %x", test.usage, test.start, test.length, test.alignment, minUnallocated, test.minUnallocated) - } - }) - } -} diff --git a/pkg/sentry/pgalloc/usage_set.go b/pkg/sentry/pgalloc/usage_set.go new file mode 100755 index 000000000..37b9235ca --- /dev/null +++ b/pkg/sentry/pgalloc/usage_set.go @@ -0,0 +1,1274 @@ +package pgalloc + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/platform" +) + +import ( + "bytes" + "fmt" +) + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + usageminDegree = 10 + + usagemaxDegree = 2 * usageminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type usageSet struct { + root usagenode `state:".(*usageSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *usageSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *usageSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *usageSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *usageSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *usageSet) FirstSegment() usageIterator { + if s.root.nrSegments == 0 { + return usageIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *usageSet) LastSegment() usageIterator { + if s.root.nrSegments == 0 { + return usageIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *usageSet) FirstGap() usageGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return usageGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *usageSet) LastGap() usageGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return usageGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *usageSet) Find(key uint64) (usageIterator, usageGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return usageIterator{n, i}, usageGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return usageIterator{}, usageGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *usageSet) FindSegment(key uint64) usageIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *usageSet) LowerBoundSegment(min uint64) usageIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *usageSet) UpperBoundSegment(max uint64) usageIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *usageSet) FindGap(key uint64) usageGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *usageSet) LowerBoundGap(min uint64) usageGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *usageSet) UpperBoundGap(max uint64) usageGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *usageSet) Add(r __generics_imported0.FileRange, val usageInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *usageSet) AddWithoutMerging(r __generics_imported0.FileRange, val usageInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *usageSet) Insert(gap usageGapIterator, r __generics_imported0.FileRange, val usageInfo) usageIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (usageSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (usageSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (usageSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + return next + } + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *usageSet) InsertWithoutMerging(gap usageGapIterator, r __generics_imported0.FileRange, val usageInfo) usageIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). +func (s *usageSet) InsertWithoutMergingUnchecked(gap usageGapIterator, r __generics_imported0.FileRange, val usageInfo) usageIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + return usageIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *usageSet) Remove(seg usageIterator) usageGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + usageSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + return seg.node.rebalanceAfterRemove(usageGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *usageSet) RemoveAll() { + s.root = usagenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *usageSet) RemoveRange(r __generics_imported0.FileRange) usageGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *usageSet) Merge(first, second usageIterator) usageIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *usageSet) MergeUnchecked(first, second usageIterator) usageIterator { + if first.End() == second.Start() { + if mval, ok := (usageSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + return s.Remove(second).PrevSegment() + } + } + return usageIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *usageSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *usageSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *usageSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *usageSet) Split(seg usageIterator, split uint64) (usageIterator, usageIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *usageSet) SplitUnchecked(seg usageIterator, split uint64) (usageIterator, usageIterator) { + val1, val2 := (usageSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *usageSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *usageSet) Isolate(seg usageIterator, r __generics_imported0.FileRange) usageIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *usageSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg usageIterator)) usageGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return usageGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return usageGapIterator{} + } + } +} + +// +stateify savable +type usagenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *usagenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [usagemaxDegree - 1]__generics_imported0.FileRange + values [usagemaxDegree - 1]usageInfo + children [usagemaxDegree]*usagenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *usagenode) firstSegment() usageIterator { + for n.hasChildren { + n = n.children[0] + } + return usageIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *usagenode) lastSegment() usageIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return usageIterator{n, n.nrSegments - 1} +} + +func (n *usagenode) prevSibling() *usagenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *usagenode) nextSibling() *usagenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *usagenode) rebalanceBeforeInsert(gap usageGapIterator) usageGapIterator { + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.nrSegments < usagemaxDegree-1 { + return gap + } + if n.parent == nil { + + left := &usagenode{ + nrSegments: usageminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &usagenode{ + nrSegments: usageminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:usageminDegree-1], n.keys[:usageminDegree-1]) + copy(left.values[:usageminDegree-1], n.values[:usageminDegree-1]) + copy(right.keys[:usageminDegree-1], n.keys[usageminDegree:]) + copy(right.values[:usageminDegree-1], n.values[usageminDegree:]) + n.keys[0], n.values[0] = n.keys[usageminDegree-1], n.values[usageminDegree-1] + usagezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:usageminDegree], n.children[:usageminDegree]) + copy(right.children[:usageminDegree], n.children[usageminDegree:]) + usagezeroNodeSlice(n.children[2:]) + for i := 0; i < usageminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + if gap.node != n { + return gap + } + if gap.index < usageminDegree { + return usageGapIterator{left, gap.index} + } + return usageGapIterator{right, gap.index - usageminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[usageminDegree-1], n.values[usageminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &usagenode{ + nrSegments: usageminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:usageminDegree-1], n.keys[usageminDegree:]) + copy(sibling.values[:usageminDegree-1], n.values[usageminDegree:]) + usagezeroValueSlice(n.values[usageminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:usageminDegree], n.children[usageminDegree:]) + usagezeroNodeSlice(n.children[usageminDegree:]) + for i := 0; i < usageminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = usageminDegree - 1 + + if gap.node != n { + return gap + } + if gap.index < usageminDegree { + return gap + } + return usageGapIterator{sibling, gap.index - usageminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *usagenode) rebalanceAfterRemove(gap usageGapIterator) usageGapIterator { + for { + if n.nrSegments >= usageminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= usageminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + usageSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling && gap.index == sibling.nrSegments { + return usageGapIterator{n, 0} + } + if gap.node == n { + return usageGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= usageminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + usageSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + if gap.node == sibling { + if gap.index == 0 { + return usageGapIterator{n, n.nrSegments} + } + return usageGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + if gap.node == left { + return usageGapIterator{p, gap.index} + } + if gap.node == right { + return usageGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *usagenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = usageGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + usageSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + n = p + } +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type usageIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *usagenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg usageIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg usageIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg usageIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg usageIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// +// - r.Length() > 0. +// +// - The new range must not overlap an existing one: If seg.NextSegment().Ok(), +// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then +// r.start >= seg.PrevSegment().End(). +func (seg usageIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg usageIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: start < seg.End(); if +// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg usageIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg usageIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: end > seg.Start(); if +// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg usageIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg usageIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg usageIterator) Value() usageInfo { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg usageIterator) ValuePtr() *usageInfo { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg usageIterator) SetValue(val usageInfo) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg usageIterator) PrevSegment() usageIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return usageIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return usageIterator{} + } + return usagesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg usageIterator) NextSegment() usageIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return usageIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return usageIterator{} + } + return usagesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg usageIterator) PrevGap() usageGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return usageGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg usageIterator) NextGap() usageGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return usageGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg usageIterator) PrevNonEmpty() (usageIterator, usageGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return usageIterator{}, gap + } + return gap.PrevSegment(), usageGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg usageIterator) NextNonEmpty() (usageIterator, usageGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return usageIterator{}, gap + } + return gap.NextSegment(), usageGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type usageGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *usagenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap usageGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap usageGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap usageGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return usageSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap usageGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return usageSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap usageGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap usageGapIterator) PrevSegment() usageIterator { + return usagesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap usageGapIterator) NextSegment() usageIterator { + return usagesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap usageGapIterator) PrevGap() usageGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return usageGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap usageGapIterator) NextGap() usageGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return usageGapIterator{} + } + return seg.NextGap() +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func usagesegmentBeforePosition(n *usagenode, i int) usageIterator { + for i == 0 { + if n.parent == nil { + return usageIterator{} + } + n, i = n.parent, n.parentIndex + } + return usageIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func usagesegmentAfterPosition(n *usagenode, i int) usageIterator { + for i == n.nrSegments { + if n.parent == nil { + return usageIterator{} + } + n, i = n.parent, n.parentIndex + } + return usageIterator{n, i} +} + +func usagezeroValueSlice(slice []usageInfo) { + + for i := range slice { + usageSetFunctions{}.ClearValue(&slice[i]) + } +} + +func usagezeroNodeSlice(slice []*usagenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *usageSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *usagenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *usagenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type usageSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []usageInfo +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *usageSet) ExportSortedSlices() *usageSegmentDataSlices { + var sds usageSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: s must be empty. sds must represent a valid set (the segments +// in sds must have valid lengths that do not overlap). The segments in sds +// must be sorted in ascending key order. +func (s *usageSet) ImportSortedSlices(sds *usageSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} +func (s *usageSet) saveRoot() *usageSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *usageSet) loadRoot(sds *usageSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD deleted file mode 100644 index 9aa6ec507..000000000 --- a/pkg/sentry/platform/BUILD +++ /dev/null @@ -1,40 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") - -go_template_instance( - name = "file_range", - out = "file_range.go", - package = "platform", - prefix = "File", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_library( - name = "platform", - srcs = [ - "context.go", - "file_range.go", - "mmap_min_addr.go", - "platform.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/atomicbitops", - "//pkg/log", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/safemem", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/platform/file_range.go b/pkg/sentry/platform/file_range.go new file mode 100755 index 000000000..685d360e3 --- /dev/null +++ b/pkg/sentry/platform/file_range.go @@ -0,0 +1,62 @@ +package platform + +// A Range represents a contiguous range of T. +// +// +stateify savable +type FileRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r FileRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r FileRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r FileRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r FileRange) Overlaps(r2 FileRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r FileRange) IsSupersetOf(r2 FileRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r FileRange) Intersect(r2 FileRange) FileRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r FileRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD deleted file mode 100644 index eeb634644..000000000 --- a/pkg/sentry/platform/interrupt/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "interrupt", - srcs = [ - "interrupt.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/interrupt", - visibility = ["//pkg/sentry:internal"], -) - -go_test( - name = "interrupt_test", - size = "small", - srcs = ["interrupt_test.go"], - embed = [":interrupt"], -) diff --git a/pkg/sentry/platform/interrupt/interrupt_state_autogen.go b/pkg/sentry/platform/interrupt/interrupt_state_autogen.go new file mode 100755 index 000000000..15e8bacdf --- /dev/null +++ b/pkg/sentry/platform/interrupt/interrupt_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package interrupt + diff --git a/pkg/sentry/platform/interrupt/interrupt_test.go b/pkg/sentry/platform/interrupt/interrupt_test.go deleted file mode 100644 index 0ecdf6e7a..000000000 --- a/pkg/sentry/platform/interrupt/interrupt_test.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package interrupt - -import ( - "testing" -) - -type countingReceiver struct { - interrupts int -} - -// NotifyInterrupt implements Receiver.NotifyInterrupt. -func (r *countingReceiver) NotifyInterrupt() { - r.interrupts++ -} - -func TestSingleInterruptBeforeEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - f.NotifyInterrupt() - // The interrupt should cause the first Enable to fail. - if f.Enable(&r) { - f.Disable() - t.Fatalf("Enable: got true, wanted false") - } - // The failing Enable "acknowledges" the interrupt, allowing future Enables - // to succeed. - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - f.Disable() -} - -func TestMultipleInterruptsBeforeEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - f.NotifyInterrupt() - f.NotifyInterrupt() - // The interrupts should cause the first Enable to fail. - if f.Enable(&r) { - f.Disable() - t.Fatalf("Enable: got true, wanted false") - } - // Interrupts are deduplicated while the Forwarder is disabled, so the - // failing Enable "acknowledges" all interrupts, allowing future Enables to - // succeed. - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - f.Disable() -} - -func TestSingleInterruptAfterEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - defer f.Disable() - f.NotifyInterrupt() - if r.interrupts != 1 { - t.Errorf("interrupts: got %d, wanted 1", r.interrupts) - } -} - -func TestMultipleInterruptsAfterEnable(t *testing.T) { - var ( - f Forwarder - r countingReceiver - ) - if !f.Enable(&r) { - t.Fatalf("Enable: got false, wanted true") - } - defer f.Disable() - f.NotifyInterrupt() - f.NotifyInterrupt() - if r.interrupts != 2 { - t.Errorf("interrupts: got %d, wanted 2", r.interrupts) - } -} diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD deleted file mode 100644 index ad8b95744..000000000 --- a/pkg/sentry/platform/kvm/BUILD +++ /dev/null @@ -1,68 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "kvm", - srcs = [ - "address_space.go", - "allocator.go", - "bluepill.go", - "bluepill_amd64.go", - "bluepill_amd64.s", - "bluepill_amd64_unsafe.go", - "bluepill_fault.go", - "bluepill_unsafe.go", - "context.go", - "filters.go", - "kvm.go", - "kvm_amd64.go", - "kvm_amd64_unsafe.go", - "kvm_const.go", - "machine.go", - "machine_amd64.go", - "machine_amd64_unsafe.go", - "machine_unsafe.go", - "physical_map.go", - "virtual_map.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/atomicbitops", - "//pkg/cpuid", - "//pkg/log", - "//pkg/procid", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/platform", - "//pkg/sentry/platform/interrupt", - "//pkg/sentry/platform/ring0", - "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/time", - "//pkg/sentry/usermem", - ], -) - -go_test( - name = "kvm_test", - srcs = [ - "kvm_test.go", - "virtual_map_test.go", - ], - embed = [":kvm"], - tags = [ - "nogotsan", - "requires-kvm", - ], - deps = [ - "//pkg/sentry/arch", - "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm/testutil", - "//pkg/sentry/platform/ring0", - "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/platform/kvm/kvm_state_autogen.go b/pkg/sentry/platform/kvm/kvm_state_autogen.go new file mode 100755 index 000000000..5ab0e0735 --- /dev/null +++ b/pkg/sentry/platform/kvm/kvm_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package kvm + diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go deleted file mode 100644 index 30df725d4..000000000 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ /dev/null @@ -1,533 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kvm - -import ( - "math/rand" - "reflect" - "sync/atomic" - "syscall" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -var dummyFPState = (*byte)(arch.NewFloatingPointData()) - -type testHarness interface { - Errorf(format string, args ...interface{}) - Fatalf(format string, args ...interface{}) -} - -func kvmTest(t testHarness, setup func(*KVM), fn func(*vCPU) bool) { - // Create the machine. - deviceFile, err := OpenDevice() - if err != nil { - t.Fatalf("error opening device file: %v", err) - } - k, err := New(deviceFile) - if err != nil { - t.Fatalf("error creating KVM instance: %v", err) - } - defer k.machine.Destroy() - - // Call additional setup. - if setup != nil { - setup(k) - } - - var c *vCPU // For recovery. - defer func() { - redpill() - if c != nil { - k.machine.Put(c) - } - }() - for { - c = k.machine.Get() - if !fn(c) { - break - } - - // We put the vCPU here and clear the value so that the - // deferred recovery will not re-put it above. - k.machine.Put(c) - c = nil - } -} - -func bluepillTest(t testHarness, fn func(*vCPU)) { - kvmTest(t, nil, func(c *vCPU) bool { - bluepill(c) - fn(c) - return false - }) -} - -func TestKernelSyscall(t *testing.T) { - bluepillTest(t, func(c *vCPU) { - redpill() // Leave guest mode. - if got := atomic.LoadUint32(&c.state); got != vCPUUser { - t.Errorf("vCPU not in ready state: got %v", got) - } - }) -} - -func hostFault() { - defer func() { - recover() - }() - var foo *int - *foo = 0 -} - -func TestKernelFault(t *testing.T) { - hostFault() // Ensure recovery works. - bluepillTest(t, func(c *vCPU) { - hostFault() - if got := atomic.LoadUint32(&c.state); got != vCPUUser { - t.Errorf("vCPU not in ready state: got %v", got) - } - }) -} - -func TestKernelFloatingPoint(t *testing.T) { - bluepillTest(t, func(c *vCPU) { - if !testutil.FloatingPointWorks() { - t.Errorf("floating point does not work, and it should!") - } - }) -} - -func applicationTest(t testHarness, useHostMappings bool, target func(), fn func(*vCPU, *syscall.PtraceRegs, *pagetables.PageTables) bool) { - // Initialize registers & page tables. - var ( - regs syscall.PtraceRegs - pt *pagetables.PageTables - ) - testutil.SetTestTarget(®s, target) - - kvmTest(t, func(k *KVM) { - // Create new page tables. - as, _, err := k.NewAddressSpace(nil /* invalidator */) - if err != nil { - t.Fatalf("can't create new address space: %v", err) - } - pt = as.(*addressSpace).pageTables - - if useHostMappings { - // Apply the physical mappings to these page tables. - // (This is normally dangerous, since they point to - // physical pages that may not exist. This shouldn't be - // done for regular user code, but is fine for test - // purposes.) - applyPhysicalRegions(func(pr physicalRegion) bool { - pt.Map(usermem.Addr(pr.virtual), pr.length, pagetables.MapOpts{ - AccessType: usermem.AnyAccess, - User: true, - }, pr.physical) - return true // Keep iterating. - }) - } - }, func(c *vCPU) bool { - // Invoke the function with the extra data. - return fn(c, ®s, pt) - }) -} - -func TestApplicationSyscall(t *testing.T) { - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != nil { - t.Errorf("application syscall with full restore failed: %v", err) - } - return false - }) - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != nil { - t.Errorf("application syscall with partial restore failed: %v", err) - } - return false - }) -} - -func TestApplicationFault(t *testing.T) { - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - testutil.SetTouchTarget(regs, nil) // Cause fault. - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != platform.ErrContextSignal || si.Signo != int32(syscall.SIGSEGV) { - t.Errorf("application fault with full restore got (%v, %v), expected (%v, SIGSEGV)", err, si, platform.ErrContextSignal) - } - return false - }) - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - testutil.SetTouchTarget(regs, nil) // Cause fault. - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if err != platform.ErrContextSignal || si.Signo != int32(syscall.SIGSEGV) { - t.Errorf("application fault with partial restore got (%v, %v), expected (%v, SIGSEGV)", err, si, platform.ErrContextSignal) - } - return false - }) -} - -func TestRegistersSyscall(t *testing.T) { - applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - testutil.SetTestRegs(regs) // Fill values for all registers. - for { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != nil { - t.Errorf("application register check with partial restore got unexpected error: %v", err) - } - if err := testutil.CheckTestRegs(regs, false); err != nil { - t.Errorf("application register check with partial restore failed: %v", err) - } - break // Done. - } - return false - }) -} - -func TestRegistersFault(t *testing.T) { - applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - testutil.SetTestRegs(regs) // Fill values for all registers. - for { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != platform.ErrContextSignal || si.Signo != int32(syscall.SIGSEGV) { - t.Errorf("application register check with full restore got unexpected error: %v", err) - } - if err := testutil.CheckTestRegs(regs, true); err != nil { - t.Errorf("application register check with full restore failed: %v", err) - } - break // Done. - } - return false - }) -} - -func TestSegments(t *testing.T) { - applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - testutil.SetTestSegments(regs) - for { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != nil { - t.Errorf("application segment check with full restore got unexpected error: %v", err) - } - if err := testutil.CheckTestSegments(regs); err != nil { - t.Errorf("application segment check with full restore failed: %v", err) - } - break // Done. - } - return false - }) -} - -func TestBounce(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - go func() { - time.Sleep(time.Millisecond) - c.BounceToKernel() - }() - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - return false - }) - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - go func() { - time.Sleep(time.Millisecond) - c.BounceToKernel() - }() - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application full restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - return false - }) -} - -func TestBounceStress(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - randomSleep := func() { - // O(hundreds of microseconds) is appropriate to ensure - // different overlaps and different schedules. - if n := rand.Intn(1000); n > 100 { - time.Sleep(time.Duration(n) * time.Microsecond) - } - } - for i := 0; i < 1000; i++ { - // Start an asynchronously executing goroutine that - // calls Bounce at pseudo-random point in time. - // This should wind up calling Bounce when the - // kernel is in various stages of the switch. - go func() { - randomSleep() - c.BounceToKernel() - }() - randomSleep() - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - c.unlock() - randomSleep() - c.lock() - } - return false - }) -} - -func TestInvalidate(t *testing.T) { - var data uintptr // Used below. - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - testutil.SetTouchTarget(regs, &data) // Read legitimate value. - for { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != nil { - t.Errorf("application partial restore: got %v, wanted nil", err) - } - break // Done. - } - // Unmap the page containing data & invalidate. - pt.Unmap(usermem.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(usermem.PageSize-1)), usermem.PageSize) - for { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - Flush: true, - }, &si); err == platform.ErrContextInterrupt { - continue // Retry. - } else if err != platform.ErrContextSignal { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextSignal) - } - break // Success. - } - return false - }) -} - -// IsFault returns true iff the given signal represents a fault. -func IsFault(err error, si *arch.SignalInfo) bool { - return err == platform.ErrContextSignal && si.Signo == int32(syscall.SIGSEGV) -} - -func TestEmptyAddressSpace(t *testing.T) { - applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if !IsFault(err, &si) { - t.Errorf("first fault with partial restore failed got %v", err) - t.Logf("registers: %#v", ®s) - } - return false - }) - applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - FullRestore: true, - }, &si); err == platform.ErrContextInterrupt { - return true // Retry. - } else if !IsFault(err, &si) { - t.Errorf("first fault with full restore failed got %v", err) - t.Logf("registers: %#v", ®s) - } - return false - }) -} - -func TestWrongVCPU(t *testing.T) { - kvmTest(t, nil, func(c1 *vCPU) bool { - kvmTest(t, nil, func(c2 *vCPU) bool { - // Basic test, one then the other. - bluepill(c1) - bluepill(c2) - if c2.switches == 0 { - // Don't allow the test to proceed if this fails. - t.Fatalf("wrong vCPU#2 switches: vCPU1=%+v,vCPU2=%+v", c1, c2) - } - - // Alternate vCPUs; we expect to need to trigger the - // wrong vCPU path on each switch. - for i := 0; i < 100; i++ { - bluepill(c1) - bluepill(c2) - } - if count := c1.switches; count < 90 { - t.Errorf("wrong vCPU#1 switches: vCPU1=%+v,vCPU2=%+v", c1, c2) - } - if count := c2.switches; count < 90 { - t.Errorf("wrong vCPU#2 switches: vCPU1=%+v,vCPU2=%+v", c1, c2) - } - return false - }) - return false - }) - kvmTest(t, nil, func(c1 *vCPU) bool { - kvmTest(t, nil, func(c2 *vCPU) bool { - bluepill(c1) - bluepill(c2) - return false - }) - return false - }) -} - -func BenchmarkApplicationSyscall(b *testing.B) { - var ( - i int // Iteration includes machine.Get() / machine.Put(). - a int // Count for ErrContextInterrupt. - ) - applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - a++ - return true // Ignore. - } else if err != nil { - b.Fatalf("benchmark failed: %v", err) - } - i++ - return i < b.N - }) - if a != 0 { - b.Logf("ErrContextInterrupt occurred %d times (in %d iterations).", a, a+i) - } -} - -func BenchmarkKernelSyscall(b *testing.B) { - // Note that the target passed here is irrelevant, we never execute SwitchToUser. - applicationTest(b, true, testutil.Getpid, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - // iteration does not include machine.Get() / machine.Put(). - for i := 0; i < b.N; i++ { - testutil.Getpid() - } - return false - }) -} - -func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { - // see BenchmarkApplicationSyscall. - var ( - i int - a int - ) - applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err == platform.ErrContextInterrupt { - a++ - return true // Ignore. - } else if err != nil { - b.Fatalf("benchmark failed: %v", err) - } - // This will intentionally cause the world switch. By executing - // a host syscall here, we force the transition between guest - // and host mode. - testutil.Getpid() - i++ - return i < b.N - }) - if a != 0 { - b.Logf("ErrContextInterrupt occurred %d times (in %d iterations).", a, a+i) - } -} diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD deleted file mode 100644 index 77a449a8b..000000000 --- a/pkg/sentry/platform/kvm/testutil/BUILD +++ /dev/null @@ -1,15 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "testutil", - testonly = 1, - srcs = [ - "testutil.go", - "testutil_amd64.go", - "testutil_amd64.s", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil", - visibility = ["//pkg/sentry/platform/kvm:__pkg__"], -) diff --git a/pkg/sentry/platform/kvm/testutil/testutil.go b/pkg/sentry/platform/kvm/testutil/testutil.go deleted file mode 100644 index 6cf2359a3..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testutil provides common assembly stubs for testing. -package testutil - -import ( - "fmt" - "strings" -) - -// Getpid executes a trivial system call. -func Getpid() - -// Touch touches the value in the first register. -func Touch() - -// SyscallLoop executes a syscall and loops. -func SyscallLoop() - -// SpinLoop spins on the CPU. -func SpinLoop() - -// HaltLoop immediately halts and loops. -func HaltLoop() - -// TwiddleRegsFault twiddles registers then faults. -func TwiddleRegsFault() - -// TwiddleRegsSyscall twiddles registers then executes a syscall. -func TwiddleRegsSyscall() - -// TwiddleSegments reads segments into known registers. -func TwiddleSegments() - -// FloatingPointWorks is a floating point test. -// -// It returns true or false. -func FloatingPointWorks() bool - -// RegisterMismatchError is used for checking registers. -type RegisterMismatchError []string - -// Error returns a human-readable error. -func (r RegisterMismatchError) Error() string { - return strings.Join([]string(r), ";") -} - -// addRegisterMisatch allows simple chaining of register mismatches. -func addRegisterMismatch(err error, reg string, got, expected interface{}) error { - errStr := fmt.Sprintf("%s got %08x, expected %08x", reg, got, expected) - switch r := err.(type) { - case nil: - // Return a new register mismatch. - return RegisterMismatchError{errStr} - case RegisterMismatchError: - // Append the error. - r = append(r, errStr) - return r - default: - // Leave as is. - return err - } -} diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go deleted file mode 100644 index 203d71528..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -package testutil - -import ( - "reflect" - "syscall" -) - -// SetTestTarget sets the rip appropriately. -func SetTestTarget(regs *syscall.PtraceRegs, fn func()) { - regs.Rip = uint64(reflect.ValueOf(fn).Pointer()) -} - -// SetTouchTarget sets rax appropriately. -func SetTouchTarget(regs *syscall.PtraceRegs, target *uintptr) { - if target != nil { - regs.Rax = uint64(reflect.ValueOf(target).Pointer()) - } else { - regs.Rax = 0 - } -} - -// RewindSyscall rewinds a syscall RIP. -func RewindSyscall(regs *syscall.PtraceRegs) { - regs.Rip -= 2 -} - -// SetTestRegs initializes registers to known values. -func SetTestRegs(regs *syscall.PtraceRegs) { - regs.R15 = 0x15 - regs.R14 = 0x14 - regs.R13 = 0x13 - regs.R12 = 0x12 - regs.Rbp = 0xb9 - regs.Rbx = 0xb4 - regs.R11 = 0x11 - regs.R10 = 0x10 - regs.R9 = 0x09 - regs.R8 = 0x08 - regs.Rax = 0x44 - regs.Rcx = 0xc4 - regs.Rdx = 0xd4 - regs.Rsi = 0x51 - regs.Rdi = 0xd1 - regs.Rsp = 0x59 -} - -// CheckTestRegs checks that registers were twiddled per TwiddleRegs. -func CheckTestRegs(regs *syscall.PtraceRegs, full bool) (err error) { - if need := ^uint64(0x15); regs.R15 != need { - err = addRegisterMismatch(err, "R15", regs.R15, need) - } - if need := ^uint64(0x14); regs.R14 != need { - err = addRegisterMismatch(err, "R14", regs.R14, need) - } - if need := ^uint64(0x13); regs.R13 != need { - err = addRegisterMismatch(err, "R13", regs.R13, need) - } - if need := ^uint64(0x12); regs.R12 != need { - err = addRegisterMismatch(err, "R12", regs.R12, need) - } - if need := ^uint64(0xb9); regs.Rbp != need { - err = addRegisterMismatch(err, "Rbp", regs.Rbp, need) - } - if need := ^uint64(0xb4); regs.Rbx != need { - err = addRegisterMismatch(err, "Rbx", regs.Rbx, need) - } - if need := ^uint64(0x10); regs.R10 != need { - err = addRegisterMismatch(err, "R10", regs.R10, need) - } - if need := ^uint64(0x09); regs.R9 != need { - err = addRegisterMismatch(err, "R9", regs.R9, need) - } - if need := ^uint64(0x08); regs.R8 != need { - err = addRegisterMismatch(err, "R8", regs.R8, need) - } - if need := ^uint64(0x44); regs.Rax != need { - err = addRegisterMismatch(err, "Rax", regs.Rax, need) - } - if need := ^uint64(0xd4); regs.Rdx != need { - err = addRegisterMismatch(err, "Rdx", regs.Rdx, need) - } - if need := ^uint64(0x51); regs.Rsi != need { - err = addRegisterMismatch(err, "Rsi", regs.Rsi, need) - } - if need := ^uint64(0xd1); regs.Rdi != need { - err = addRegisterMismatch(err, "Rdi", regs.Rdi, need) - } - if need := ^uint64(0x59); regs.Rsp != need { - err = addRegisterMismatch(err, "Rsp", regs.Rsp, need) - } - // Rcx & R11 are ignored if !full is set. - if need := ^uint64(0x11); full && regs.R11 != need { - err = addRegisterMismatch(err, "R11", regs.R11, need) - } - if need := ^uint64(0xc4); full && regs.Rcx != need { - err = addRegisterMismatch(err, "Rcx", regs.Rcx, need) - } - return -} - -var fsData uint64 = 0x55 -var gsData uint64 = 0x85 - -// SetTestSegments initializes segments to known values. -func SetTestSegments(regs *syscall.PtraceRegs) { - regs.Fs_base = uint64(reflect.ValueOf(&fsData).Pointer()) - regs.Gs_base = uint64(reflect.ValueOf(&gsData).Pointer()) -} - -// CheckTestSegments checks that registers were twiddled per TwiddleSegments. -func CheckTestSegments(regs *syscall.PtraceRegs) (err error) { - if regs.Rax != fsData { - err = addRegisterMismatch(err, "Rax", regs.Rax, fsData) - } - if regs.Rbx != gsData { - err = addRegisterMismatch(err, "Rbx", regs.Rcx, gsData) - } - return -} diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s deleted file mode 100644 index 491ec0c2a..000000000 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -// test_util_amd64.s provides AMD64 test functions. - -#include "funcdata.h" -#include "textflag.h" - -TEXT ·Getpid(SB),NOSPLIT,$0 - NO_LOCAL_POINTERS - MOVQ $39, AX // getpid - SYSCALL - RET - -TEXT ·Touch(SB),NOSPLIT,$0 -start: - MOVQ 0(AX), BX // deref AX - MOVQ $39, AX // getpid - SYSCALL - JMP start - -TEXT ·HaltLoop(SB),NOSPLIT,$0 -start: - HLT - JMP start - -TEXT ·SyscallLoop(SB),NOSPLIT,$0 -start: - SYSCALL - JMP start - -TEXT ·SpinLoop(SB),NOSPLIT,$0 -start: - JMP start - -TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 - NO_LOCAL_POINTERS - MOVQ $1, AX - MOVQ AX, X0 - MOVQ $39, AX // getpid - SYSCALL - MOVQ X0, AX - CMPQ AX, $1 - SETEQ ret+0(FP) - RET - -#define TWIDDLE_REGS() \ - NOTQ R15; \ - NOTQ R14; \ - NOTQ R13; \ - NOTQ R12; \ - NOTQ BP; \ - NOTQ BX; \ - NOTQ R11; \ - NOTQ R10; \ - NOTQ R9; \ - NOTQ R8; \ - NOTQ AX; \ - NOTQ CX; \ - NOTQ DX; \ - NOTQ SI; \ - NOTQ DI; \ - NOTQ SP; - -TEXT ·TwiddleRegsSyscall(SB),NOSPLIT,$0 - TWIDDLE_REGS() - SYSCALL - RET // never reached - -TEXT ·TwiddleRegsFault(SB),NOSPLIT,$0 - TWIDDLE_REGS() - JMP AX // must fault - RET // never reached - -#define READ_FS() BYTE $0x64; BYTE $0x48; BYTE $0x8b; BYTE $0x00; -#define READ_GS() BYTE $0x65; BYTE $0x48; BYTE $0x8b; BYTE $0x00; - -TEXT ·TwiddleSegments(SB),NOSPLIT,$0 - MOVQ $0x0, AX - READ_GS() - MOVQ AX, BX - MOVQ $0x0, AX - READ_FS() - SYSCALL - RET // never reached diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go deleted file mode 100644 index 6a2f145be..000000000 --- a/pkg/sentry/platform/kvm/virtual_map_test.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kvm - -import ( - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -type checker struct { - ok bool - accessType usermem.AccessType -} - -func (c *checker) Containing(addr uintptr) func(virtualRegion) { - c.ok = false // Reset for below calls. - return func(vr virtualRegion) { - if vr.virtual <= addr && addr < vr.virtual+vr.length { - c.ok = true - c.accessType = vr.accessType - } - } -} - -func TestParseMaps(t *testing.T) { - c := new(checker) - - // Simple test. - if err := applyVirtualRegions(c.Containing(0)); err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // MMap a new page. - addr, _, errno := syscall.RawSyscall6( - syscall.SYS_MMAP, 0, usermem.PageSize, - syscall.PROT_READ|syscall.PROT_WRITE, - syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE, 0, 0) - if errno != 0 { - t.Fatalf("unexpected map error: %v", errno) - } - - // Re-parse maps. - if err := applyVirtualRegions(c.Containing(addr)); err != nil { - syscall.RawSyscall(syscall.SYS_MUNMAP, addr, usermem.PageSize, 0) - t.Fatalf("unexpected error: %v", err) - } - - // Assert that it now does contain the region. - if !c.ok { - syscall.RawSyscall(syscall.SYS_MUNMAP, addr, usermem.PageSize, 0) - t.Fatalf("updated map does not contain 0x%08x, expected true", addr) - } - - // Map the region as PROT_NONE. - newAddr, _, errno := syscall.RawSyscall6( - syscall.SYS_MMAP, addr, usermem.PageSize, - syscall.PROT_NONE, - syscall.MAP_ANONYMOUS|syscall.MAP_FIXED|syscall.MAP_PRIVATE, 0, 0) - if errno != 0 { - t.Fatalf("unexpected map error: %v", errno) - } - if newAddr != addr { - t.Fatalf("unable to remap address: got 0x%08x, wanted 0x%08x", newAddr, addr) - } - - // Re-parse maps. - if err := applyVirtualRegions(c.Containing(addr)); err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !c.ok { - t.Fatalf("final map does not contain 0x%08x, expected true", addr) - } - if c.accessType.Read || c.accessType.Write || c.accessType.Execute { - t.Fatalf("final map has incorrect permissions for 0x%08x", addr) - } - - // Unmap the region. - syscall.RawSyscall(syscall.SYS_MUNMAP, addr, usermem.PageSize, 0) -} diff --git a/pkg/sentry/platform/platform_state_autogen.go b/pkg/sentry/platform/platform_state_autogen.go new file mode 100755 index 000000000..28dcd1764 --- /dev/null +++ b/pkg/sentry/platform/platform_state_autogen.go @@ -0,0 +1,24 @@ +// automatically generated by stateify. + +package platform + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *FileRange) beforeSave() {} +func (x *FileRange) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) +} + +func (x *FileRange) afterLoad() {} +func (x *FileRange) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) +} + +func init() { + state.Register("platform.FileRange", (*FileRange)(nil), state.Fns{Save: (*FileRange).save, Load: (*FileRange).load}) +} diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD deleted file mode 100644 index 1b6c54e96..000000000 --- a/pkg/sentry/platform/ptrace/BUILD +++ /dev/null @@ -1,33 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "ptrace", - srcs = [ - "filters.go", - "ptrace.go", - "ptrace_unsafe.go", - "stub_amd64.s", - "stub_unsafe.go", - "subprocess.go", - "subprocess_amd64.go", - "subprocess_linux.go", - "subprocess_linux_amd64_unsafe.go", - "subprocess_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ptrace", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/procid", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/platform", - "//pkg/sentry/platform/interrupt", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/platform/ptrace/ptrace_state_autogen.go b/pkg/sentry/platform/ptrace/ptrace_state_autogen.go new file mode 100755 index 000000000..ac83a71e7 --- /dev/null +++ b/pkg/sentry/platform/ptrace/ptrace_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package ptrace + diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD deleted file mode 100644 index 8ed6c7652..000000000 --- a/pkg/sentry/platform/ring0/BUILD +++ /dev/null @@ -1,53 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") - -go_template( - name = "defs", - srcs = [ - "defs.go", - "defs_amd64.go", - "offsets_amd64.go", - "x86.go", - ], - visibility = [":__subpackages__"], -) - -go_template_instance( - name = "defs_impl", - out = "defs_impl.go", - package = "ring0", - template = ":defs", -) - -genrule( - name = "entry_impl_amd64", - srcs = ["entry_amd64.s"], - outs = ["entry_impl_amd64.s"], - cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@", - tools = ["//pkg/sentry/platform/ring0/gen_offsets"], -) - -go_library( - name = "ring0", - srcs = [ - "defs_impl.go", - "entry_amd64.go", - "entry_impl_amd64.s", - "kernel.go", - "kernel_amd64.go", - "kernel_unsafe.go", - "lib_amd64.go", - "lib_amd64.s", - "ring0.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/cpuid", - "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go deleted file mode 100644 index 076063f85..000000000 --- a/pkg/sentry/platform/ring0/defs.go +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ring0 - -import ( - "syscall" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -var ( - // UserspaceSize is the total size of userspace. - UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1) - - // MaximumUserAddress is the largest possible user address. - MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1) - - // KernelStartAddress is the starting kernel address. - KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1) -) - -// Kernel is a global kernel object. -// -// This contains global state, shared by multiple CPUs. -type Kernel struct { - KernelArchState -} - -// Hooks are hooks for kernel functions. -type Hooks interface { - // KernelSyscall is called for kernel system calls. - // - // Return from this call will restore registers and return to the kernel: the - // registers must be modified directly. - // - // If this function is not provided, a kernel exception results in halt. - // - // This must be go:nosplit, as this will be on the interrupt stack. - // Closures are permitted, as the pointer to the closure frame is not - // passed on the stack. - KernelSyscall() - - // KernelException handles an exception during kernel execution. - // - // Return from this call will restore registers and return to the kernel: the - // registers must be modified directly. - // - // If this function is not provided, a kernel exception results in halt. - // - // This must be go:nosplit, as this will be on the interrupt stack. - // Closures are permitted, as the pointer to the closure frame is not - // passed on the stack. - KernelException(Vector) -} - -// CPU is the per-CPU struct. -type CPU struct { - // self is a self reference. - // - // This is always guaranteed to be at offset zero. - self *CPU - - // kernel is reference to the kernel that this CPU was initialized - // with. This reference is kept for garbage collection purposes: CPU - // registers may refer to objects within the Kernel object that cannot - // be safely freed. - kernel *Kernel - - // CPUArchState is architecture-specific state. - CPUArchState - - // registers is a set of registers; these may be used on kernel system - // calls and exceptions via the Registers function. - registers syscall.PtraceRegs - - // hooks are kernel hooks. - hooks Hooks -} - -// Registers returns a modifiable-copy of the kernel registers. -// -// This is explicitly safe to call during KernelException and KernelSyscall. -// -//go:nosplit -func (c *CPU) Registers() *syscall.PtraceRegs { - return &c.registers -} - -// SwitchOpts are passed to the Switch function. -type SwitchOpts struct { - // Registers are the user register state. - Registers *syscall.PtraceRegs - - // FloatingPointState is a byte pointer where floating point state is - // saved and restored. - FloatingPointState *byte - - // PageTables are the application page tables. - PageTables *pagetables.PageTables - - // Flush indicates that a TLB flush should be forced on switch. - Flush bool - - // FullRestore indicates that an iret-based restore should be used. - FullRestore bool - - // SwitchArchOpts are architecture-specific options. - SwitchArchOpts -} diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go deleted file mode 100644 index 7206322b1..000000000 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -package ring0 - -import ( - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" -) - -// Segment indices and Selectors. -const ( - // Index into GDT array. - _ = iota // Null descriptor first. - _ // Reserved (Linux is kernel 32). - segKcode // Kernel code (64-bit). - segKdata // Kernel data. - segUcode32 // User code (32-bit). - segUdata // User data. - segUcode64 // User code (64-bit). - segTss // Task segment descriptor. - segTssHi // Upper bits for TSS. - segLast // Last segment (terminal, not included). -) - -// Selectors. -const ( - Kcode Selector = segKcode << 3 - Kdata Selector = segKdata << 3 - Ucode32 Selector = (segUcode32 << 3) | 3 - Udata Selector = (segUdata << 3) | 3 - Ucode64 Selector = (segUcode64 << 3) | 3 - Tss Selector = segTss << 3 -) - -// Standard segments. -var ( - UserCodeSegment32 SegmentDescriptor - UserDataSegment SegmentDescriptor - UserCodeSegment64 SegmentDescriptor - KernelCodeSegment SegmentDescriptor - KernelDataSegment SegmentDescriptor -) - -// KernelOpts has initialization options for the kernel. -type KernelOpts struct { - // PageTables are the kernel pagetables; this must be provided. - PageTables *pagetables.PageTables -} - -// KernelArchState contains architecture-specific state. -type KernelArchState struct { - KernelOpts - - // globalIDT is our set of interrupt gates. - globalIDT idt64 -} - -// CPUArchState contains CPU-specific arch state. -type CPUArchState struct { - // stack is the stack used for interrupts on this CPU. - stack [256]byte - - // errorCode is the error code from the last exception. - errorCode uintptr - - // errorType indicates the type of error code here, it is always set - // along with the errorCode value above. - // - // It will either by 1, which indicates a user error, or 0 indicating a - // kernel error. If the error code below returns false (kernel error), - // then it cannot provide relevant information about the last - // exception. - errorType uintptr - - // gdt is the CPU's descriptor table. - gdt descriptorTable - - // tss is the CPU's task state. - tss TaskState64 -} - -// ErrorCode returns the last error code. -// -// The returned boolean indicates whether the error code corresponds to the -// last user error or not. If it does not, then fault information must be -// ignored. This is generally the result of a kernel fault while servicing a -// user fault. -// -//go:nosplit -func (c *CPU) ErrorCode() (value uintptr, user bool) { - return c.errorCode, c.errorType != 0 -} - -// ClearErrorCode resets the error code. -// -//go:nosplit -func (c *CPU) ClearErrorCode() { - c.errorCode = 0 // No code. - c.errorType = 1 // User mode. -} - -// SwitchArchOpts are embedded in SwitchOpts. -type SwitchArchOpts struct { - // UserPCID indicates that the application PCID to be used on switch, - // assuming that PCIDs are supported. - // - // Per pagetables_x86.go, a zero PCID implies a flush. - UserPCID uint16 - - // KernelPCID indicates that the kernel PCID to be used on return, - // assuming that PCIDs are supported. - // - // Per pagetables_x86.go, a zero PCID implies a flush. - KernelPCID uint16 -} - -func init() { - KernelCodeSegment.setCode64(0, 0, 0) - KernelDataSegment.setData(0, 0xffffffff, 0) - UserCodeSegment32.setCode64(0, 0, 3) - UserDataSegment.setData(0, 0xffffffff, 3) - UserCodeSegment64.setCode64(0, 0, 3) -} diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go new file mode 100755 index 000000000..d4bfc5a4a --- /dev/null +++ b/pkg/sentry/platform/ring0/defs_impl.go @@ -0,0 +1,538 @@ +package ring0 + +import ( + "syscall" + + "fmt" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "io" + "reflect" +) + +var ( + // UserspaceSize is the total size of userspace. + UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1) + + // MaximumUserAddress is the largest possible user address. + MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1) + + // KernelStartAddress is the starting kernel address. + KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1) +) + +// Kernel is a global kernel object. +// +// This contains global state, shared by multiple CPUs. +type Kernel struct { + KernelArchState +} + +// Hooks are hooks for kernel functions. +type Hooks interface { + // KernelSyscall is called for kernel system calls. + // + // Return from this call will restore registers and return to the kernel: the + // registers must be modified directly. + // + // If this function is not provided, a kernel exception results in halt. + // + // This must be go:nosplit, as this will be on the interrupt stack. + // Closures are permitted, as the pointer to the closure frame is not + // passed on the stack. + KernelSyscall() + + // KernelException handles an exception during kernel execution. + // + // Return from this call will restore registers and return to the kernel: the + // registers must be modified directly. + // + // If this function is not provided, a kernel exception results in halt. + // + // This must be go:nosplit, as this will be on the interrupt stack. + // Closures are permitted, as the pointer to the closure frame is not + // passed on the stack. + KernelException(Vector) +} + +// CPU is the per-CPU struct. +type CPU struct { + // self is a self reference. + // + // This is always guaranteed to be at offset zero. + self *CPU + + // kernel is reference to the kernel that this CPU was initialized + // with. This reference is kept for garbage collection purposes: CPU + // registers may refer to objects within the Kernel object that cannot + // be safely freed. + kernel *Kernel + + // CPUArchState is architecture-specific state. + CPUArchState + + // registers is a set of registers; these may be used on kernel system + // calls and exceptions via the Registers function. + registers syscall.PtraceRegs + + // hooks are kernel hooks. + hooks Hooks +} + +// Registers returns a modifiable-copy of the kernel registers. +// +// This is explicitly safe to call during KernelException and KernelSyscall. +// +//go:nosplit +func (c *CPU) Registers() *syscall.PtraceRegs { + return &c.registers +} + +// SwitchOpts are passed to the Switch function. +type SwitchOpts struct { + // Registers are the user register state. + Registers *syscall.PtraceRegs + + // FloatingPointState is a byte pointer where floating point state is + // saved and restored. + FloatingPointState *byte + + // PageTables are the application page tables. + PageTables *pagetables.PageTables + + // Flush indicates that a TLB flush should be forced on switch. + Flush bool + + // FullRestore indicates that an iret-based restore should be used. + FullRestore bool + + // SwitchArchOpts are architecture-specific options. + SwitchArchOpts +} + +// Segment indices and Selectors. +const ( + // Index into GDT array. + _ = iota // Null descriptor first. + _ // Reserved (Linux is kernel 32). + segKcode // Kernel code (64-bit). + segKdata // Kernel data. + segUcode32 // User code (32-bit). + segUdata // User data. + segUcode64 // User code (64-bit). + segTss // Task segment descriptor. + segTssHi // Upper bits for TSS. + segLast // Last segment (terminal, not included). +) + +// Selectors. +const ( + Kcode Selector = segKcode << 3 + Kdata Selector = segKdata << 3 + Ucode32 Selector = (segUcode32 << 3) | 3 + Udata Selector = (segUdata << 3) | 3 + Ucode64 Selector = (segUcode64 << 3) | 3 + Tss Selector = segTss << 3 +) + +// Standard segments. +var ( + UserCodeSegment32 SegmentDescriptor + UserDataSegment SegmentDescriptor + UserCodeSegment64 SegmentDescriptor + KernelCodeSegment SegmentDescriptor + KernelDataSegment SegmentDescriptor +) + +// KernelOpts has initialization options for the kernel. +type KernelOpts struct { + // PageTables are the kernel pagetables; this must be provided. + PageTables *pagetables.PageTables +} + +// KernelArchState contains architecture-specific state. +type KernelArchState struct { + KernelOpts + + // globalIDT is our set of interrupt gates. + globalIDT idt64 +} + +// CPUArchState contains CPU-specific arch state. +type CPUArchState struct { + // stack is the stack used for interrupts on this CPU. + stack [256]byte + + // errorCode is the error code from the last exception. + errorCode uintptr + + // errorType indicates the type of error code here, it is always set + // along with the errorCode value above. + // + // It will either by 1, which indicates a user error, or 0 indicating a + // kernel error. If the error code below returns false (kernel error), + // then it cannot provide relevant information about the last + // exception. + errorType uintptr + + // gdt is the CPU's descriptor table. + gdt descriptorTable + + // tss is the CPU's task state. + tss TaskState64 +} + +// ErrorCode returns the last error code. +// +// The returned boolean indicates whether the error code corresponds to the +// last user error or not. If it does not, then fault information must be +// ignored. This is generally the result of a kernel fault while servicing a +// user fault. +// +//go:nosplit +func (c *CPU) ErrorCode() (value uintptr, user bool) { + return c.errorCode, c.errorType != 0 +} + +// ClearErrorCode resets the error code. +// +//go:nosplit +func (c *CPU) ClearErrorCode() { + c.errorCode = 0 + c.errorType = 1 +} + +// SwitchArchOpts are embedded in SwitchOpts. +type SwitchArchOpts struct { + // UserPCID indicates that the application PCID to be used on switch, + // assuming that PCIDs are supported. + // + // Per pagetables_x86.go, a zero PCID implies a flush. + UserPCID uint16 + + // KernelPCID indicates that the kernel PCID to be used on return, + // assuming that PCIDs are supported. + // + // Per pagetables_x86.go, a zero PCID implies a flush. + KernelPCID uint16 +} + +func init() { + KernelCodeSegment.setCode64(0, 0, 0) + KernelDataSegment.setData(0, 0xffffffff, 0) + UserCodeSegment32.setCode64(0, 0, 3) + UserDataSegment.setData(0, 0xffffffff, 3) + UserCodeSegment64.setCode64(0, 0, 3) +} + +// Emit prints architecture-specific offsets. +func Emit(w io.Writer) { + fmt.Fprintf(w, "// Automatically generated, do not edit.\n") + + c := &CPU{} + fmt.Fprintf(w, "\n// CPU offsets.\n") + fmt.Fprintf(w, "#define CPU_SELF 0x%02x\n", reflect.ValueOf(&c.self).Pointer()-reflect.ValueOf(c).Pointer()) + fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer()) + fmt.Fprintf(w, "#define CPU_STACK_TOP 0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack))) + fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer()) + fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer()) + + fmt.Fprintf(w, "\n// Bits.\n") + fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF) + fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet) + + fmt.Fprintf(w, "\n// Vectors.\n") + fmt.Fprintf(w, "#define DivideByZero 0x%02x\n", DivideByZero) + fmt.Fprintf(w, "#define Debug 0x%02x\n", Debug) + fmt.Fprintf(w, "#define NMI 0x%02x\n", NMI) + fmt.Fprintf(w, "#define Breakpoint 0x%02x\n", Breakpoint) + fmt.Fprintf(w, "#define Overflow 0x%02x\n", Overflow) + fmt.Fprintf(w, "#define BoundRangeExceeded 0x%02x\n", BoundRangeExceeded) + fmt.Fprintf(w, "#define InvalidOpcode 0x%02x\n", InvalidOpcode) + fmt.Fprintf(w, "#define DeviceNotAvailable 0x%02x\n", DeviceNotAvailable) + fmt.Fprintf(w, "#define DoubleFault 0x%02x\n", DoubleFault) + fmt.Fprintf(w, "#define CoprocessorSegmentOverrun 0x%02x\n", CoprocessorSegmentOverrun) + fmt.Fprintf(w, "#define InvalidTSS 0x%02x\n", InvalidTSS) + fmt.Fprintf(w, "#define SegmentNotPresent 0x%02x\n", SegmentNotPresent) + fmt.Fprintf(w, "#define StackSegmentFault 0x%02x\n", StackSegmentFault) + fmt.Fprintf(w, "#define GeneralProtectionFault 0x%02x\n", GeneralProtectionFault) + fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault) + fmt.Fprintf(w, "#define X87FloatingPointException 0x%02x\n", X87FloatingPointException) + fmt.Fprintf(w, "#define AlignmentCheck 0x%02x\n", AlignmentCheck) + fmt.Fprintf(w, "#define MachineCheck 0x%02x\n", MachineCheck) + fmt.Fprintf(w, "#define SIMDFloatingPointException 0x%02x\n", SIMDFloatingPointException) + fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException) + fmt.Fprintf(w, "#define SecurityException 0x%02x\n", SecurityException) + fmt.Fprintf(w, "#define SyscallInt80 0x%02x\n", SyscallInt80) + fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall) + + p := &syscall.PtraceRegs{} + fmt.Fprintf(w, "\n// Ptrace registers.\n") + fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.R13).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.R12).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RBP 0x%02x\n", reflect.ValueOf(&p.Rbp).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RBX 0x%02x\n", reflect.ValueOf(&p.Rbx).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.R11).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.R10).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.R9).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.R8).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RAX 0x%02x\n", reflect.ValueOf(&p.Rax).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RCX 0x%02x\n", reflect.ValueOf(&p.Rcx).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RDX 0x%02x\n", reflect.ValueOf(&p.Rdx).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RSI 0x%02x\n", reflect.ValueOf(&p.Rsi).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RDI 0x%02x\n", reflect.ValueOf(&p.Rdi).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_ORIGRAX 0x%02x\n", reflect.ValueOf(&p.Orig_rax).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RIP 0x%02x\n", reflect.ValueOf(&p.Rip).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_CS 0x%02x\n", reflect.ValueOf(&p.Cs).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_FLAGS 0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_RSP 0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_SS 0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_FS 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_GS 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer()) +} + +// Useful bits. +const ( + _CR0_PE = 1 << 0 + _CR0_ET = 1 << 4 + _CR0_AM = 1 << 18 + _CR0_PG = 1 << 31 + + _CR4_PSE = 1 << 4 + _CR4_PAE = 1 << 5 + _CR4_PGE = 1 << 7 + _CR4_OSFXSR = 1 << 9 + _CR4_OSXMMEXCPT = 1 << 10 + _CR4_FSGSBASE = 1 << 16 + _CR4_PCIDE = 1 << 17 + _CR4_OSXSAVE = 1 << 18 + _CR4_SMEP = 1 << 20 + + _RFLAGS_AC = 1 << 18 + _RFLAGS_NT = 1 << 14 + _RFLAGS_IOPL = 3 << 12 + _RFLAGS_DF = 1 << 10 + _RFLAGS_IF = 1 << 9 + _RFLAGS_STEP = 1 << 8 + _RFLAGS_RESERVED = 1 << 1 + + _EFER_SCE = 0x001 + _EFER_LME = 0x100 + _EFER_LMA = 0x400 + _EFER_NX = 0x800 + + _MSR_STAR = 0xc0000081 + _MSR_LSTAR = 0xc0000082 + _MSR_CSTAR = 0xc0000083 + _MSR_SYSCALL_MASK = 0xc0000084 + _MSR_PLATFORM_INFO = 0xce + _MSR_MISC_FEATURES = 0x140 + + _PLATFORM_INFO_CPUID_FAULT = 1 << 31 + + _MISC_FEATURE_CPUID_TRAP = 0x1 +) + +const ( + // KernelFlagsSet should always be set in the kernel. + KernelFlagsSet = _RFLAGS_RESERVED + + // UserFlagsSet are always set in userspace. + UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF + + // KernelFlagsClear should always be clear in the kernel. + KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT + + // UserFlagsClear are always cleared in userspace. + UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL +) + +// Vector is an exception vector. +type Vector uintptr + +// Exception vectors. +const ( + DivideByZero Vector = iota + Debug + NMI + Breakpoint + Overflow + BoundRangeExceeded + InvalidOpcode + DeviceNotAvailable + DoubleFault + CoprocessorSegmentOverrun + InvalidTSS + SegmentNotPresent + StackSegmentFault + GeneralProtectionFault + PageFault + _ + X87FloatingPointException + AlignmentCheck + MachineCheck + SIMDFloatingPointException + VirtualizationException + SecurityException = 0x1e + SyscallInt80 = 0x80 + _NR_INTERRUPTS = SyscallInt80 + 1 +) + +// System call vectors. +const ( + Syscall Vector = _NR_INTERRUPTS +) + +// VirtualAddressBits returns the number bits available for virtual addresses. +// +// Note that sign-extension semantics apply to the highest order bit. +// +// FIXME(b/69382326): This should use the cpuid passed to Init. +func VirtualAddressBits() uint32 { + ax, _, _, _ := cpuid.HostID(0x80000008, 0) + return (ax >> 8) & 0xff +} + +// PhysicalAddressBits returns the number of bits available for physical addresses. +// +// FIXME(b/69382326): This should use the cpuid passed to Init. +func PhysicalAddressBits() uint32 { + ax, _, _, _ := cpuid.HostID(0x80000008, 0) + return ax & 0xff +} + +// Selector is a segment Selector. +type Selector uint16 + +// SegmentDescriptor is a segment descriptor. +type SegmentDescriptor struct { + bits [2]uint32 +} + +// descriptorTable is a collection of descriptors. +type descriptorTable [32]SegmentDescriptor + +// SegmentDescriptorFlags are typed flags within a descriptor. +type SegmentDescriptorFlags uint32 + +// SegmentDescriptorFlag declarations. +const ( + SegmentDescriptorAccess SegmentDescriptorFlags = 1 << 8 // Access bit (always set). + SegmentDescriptorWrite = 1 << 9 // Write permission. + SegmentDescriptorExpandDown = 1 << 10 // Grows down, not used. + SegmentDescriptorExecute = 1 << 11 // Execute permission. + SegmentDescriptorSystem = 1 << 12 // Zero => system, 1 => user code/data. + SegmentDescriptorPresent = 1 << 15 // Present. + SegmentDescriptorAVL = 1 << 20 // Available. + SegmentDescriptorLong = 1 << 21 // Long mode. + SegmentDescriptorDB = 1 << 22 // 16 or 32-bit. + SegmentDescriptorG = 1 << 23 // Granularity: page or byte. +) + +// Base returns the descriptor's base linear address. +func (d *SegmentDescriptor) Base() uint32 { + return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16 +} + +// Limit returns the descriptor size. +func (d *SegmentDescriptor) Limit() uint32 { + l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000 + if d.bits[1]&uint32(SegmentDescriptorG) != 0 { + l <<= 12 + l |= 0xFFF + } + return l +} + +// Flags returns descriptor flags. +func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags { + return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00) +} + +// DPL returns the descriptor privilege level. +func (d *SegmentDescriptor) DPL() int { + return int((d.bits[1] >> 13) & 3) +} + +func (d *SegmentDescriptor) setNull() { + d.bits[0] = 0 + d.bits[1] = 0 +} + +func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) { + flags |= SegmentDescriptorPresent + if limit>>12 != 0 { + limit >>= 12 + flags |= SegmentDescriptorG + } + d.bits[0] = base<<16 | limit&0xFFFF + d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13 +} + +func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) { + d.set(base, limit, dpl, + SegmentDescriptorDB| + SegmentDescriptorExecute| + SegmentDescriptorSystem) +} + +func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) { + d.set(base, limit, dpl, + SegmentDescriptorG| + SegmentDescriptorLong| + SegmentDescriptorExecute| + SegmentDescriptorSystem) +} + +func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) { + d.set(base, limit, dpl, + SegmentDescriptorWrite| + SegmentDescriptorSystem) +} + +// setHi is only used for the TSS segment, which is magically 64-bits. +func (d *SegmentDescriptor) setHi(base uint32) { + d.bits[0] = base + d.bits[1] = 0 +} + +// Gate64 is a 64-bit task, trap, or interrupt gate. +type Gate64 struct { + bits [4]uint32 +} + +// idt64 is a 64-bit interrupt descriptor table. +type idt64 [_NR_INTERRUPTS]Gate64 + +func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) { + g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF + g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7 + g.bits[2] = uint32(rip >> 32) +} + +func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) { + g.setInterrupt(cs, rip, dpl, ist) + g.bits[1] |= 1 << 8 +} + +// TaskState64 is a 64-bit task state structure. +type TaskState64 struct { + _ uint32 + rsp0Lo, rsp0Hi uint32 + rsp1Lo, rsp1Hi uint32 + rsp2Lo, rsp2Hi uint32 + _ [2]uint32 + ist1Lo, ist1Hi uint32 + ist2Lo, ist2Hi uint32 + ist3Lo, ist3Hi uint32 + ist4Lo, ist4Hi uint32 + ist5Lo, ist5Hi uint32 + ist6Lo, ist6Hi uint32 + ist7Lo, ist7Hi uint32 + _ [2]uint32 + _ uint16 + ioPerm uint16 +} diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_impl_amd64.s index 02df38331..daba45f9d 100644..100755 --- a/pkg/sentry/platform/ring0/entry_amd64.s +++ b/pkg/sentry/platform/ring0/entry_impl_amd64.s @@ -1,3 +1,67 @@ +// build +amd64 + +// Automatically generated, do not edit. + +// CPU offsets. +#define CPU_SELF 0x00 +#define CPU_REGISTERS 0x288 +#define CPU_STACK_TOP 0x110 +#define CPU_ERROR_CODE 0x110 +#define CPU_ERROR_TYPE 0x118 + +// Bits. +#define _RFLAGS_IF 0x200 +#define _KERNEL_FLAGS 0x02 + +// Vectors. +#define DivideByZero 0x00 +#define Debug 0x01 +#define NMI 0x02 +#define Breakpoint 0x03 +#define Overflow 0x04 +#define BoundRangeExceeded 0x05 +#define InvalidOpcode 0x06 +#define DeviceNotAvailable 0x07 +#define DoubleFault 0x08 +#define CoprocessorSegmentOverrun 0x09 +#define InvalidTSS 0x0a +#define SegmentNotPresent 0x0b +#define StackSegmentFault 0x0c +#define GeneralProtectionFault 0x0d +#define PageFault 0x0e +#define X87FloatingPointException 0x10 +#define AlignmentCheck 0x11 +#define MachineCheck 0x12 +#define SIMDFloatingPointException 0x13 +#define VirtualizationException 0x14 +#define SecurityException 0x1e +#define SyscallInt80 0x80 +#define Syscall 0x81 + +// Ptrace registers. +#define PTRACE_R15 0x00 +#define PTRACE_R14 0x08 +#define PTRACE_R13 0x10 +#define PTRACE_R12 0x18 +#define PTRACE_RBP 0x20 +#define PTRACE_RBX 0x28 +#define PTRACE_R11 0x30 +#define PTRACE_R10 0x38 +#define PTRACE_R9 0x40 +#define PTRACE_R8 0x48 +#define PTRACE_RAX 0x50 +#define PTRACE_RCX 0x58 +#define PTRACE_RDX 0x60 +#define PTRACE_RSI 0x68 +#define PTRACE_RDI 0x70 +#define PTRACE_ORIGRAX 0x78 +#define PTRACE_RIP 0x80 +#define PTRACE_CS 0x88 +#define PTRACE_FLAGS 0x90 +#define PTRACE_RSP 0x98 +#define PTRACE_SS 0xa0 +#define PTRACE_FS 0xa8 +#define PTRACE_GS 0xb0 // Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD deleted file mode 100644 index d7029d5a9..000000000 --- a/pkg/sentry/platform/ring0/gen_offsets/BUILD +++ /dev/null @@ -1,26 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "defs_impl", - out = "defs_impl.go", - package = "main", - template = "//pkg/sentry/platform/ring0:defs", -) - -go_binary( - name = "gen_offsets", - srcs = [ - "defs_impl.go", - "main.go", - ], - visibility = ["//pkg/sentry/platform/ring0:__pkg__"], - deps = [ - "//pkg/cpuid", - "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/usermem", - ], -) diff --git a/pkg/sentry/platform/ring0/gen_offsets/main.go b/pkg/sentry/platform/ring0/gen_offsets/main.go deleted file mode 100644 index a4927da2f..000000000 --- a/pkg/sentry/platform/ring0/gen_offsets/main.go +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary gen_offsets is a helper for generating offset headers. -package main - -import ( - "os" -) - -func main() { - Emit(os.Stdout) -} diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go deleted file mode 100644 index 85cc3fdad..000000000 --- a/pkg/sentry/platform/ring0/offsets_amd64.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -package ring0 - -import ( - "fmt" - "io" - "reflect" - "syscall" -) - -// Emit prints architecture-specific offsets. -func Emit(w io.Writer) { - fmt.Fprintf(w, "// Automatically generated, do not edit.\n") - - c := &CPU{} - fmt.Fprintf(w, "\n// CPU offsets.\n") - fmt.Fprintf(w, "#define CPU_SELF 0x%02x\n", reflect.ValueOf(&c.self).Pointer()-reflect.ValueOf(c).Pointer()) - fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer()) - fmt.Fprintf(w, "#define CPU_STACK_TOP 0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack))) - fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer()) - fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer()) - - fmt.Fprintf(w, "\n// Bits.\n") - fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF) - fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet) - - fmt.Fprintf(w, "\n// Vectors.\n") - fmt.Fprintf(w, "#define DivideByZero 0x%02x\n", DivideByZero) - fmt.Fprintf(w, "#define Debug 0x%02x\n", Debug) - fmt.Fprintf(w, "#define NMI 0x%02x\n", NMI) - fmt.Fprintf(w, "#define Breakpoint 0x%02x\n", Breakpoint) - fmt.Fprintf(w, "#define Overflow 0x%02x\n", Overflow) - fmt.Fprintf(w, "#define BoundRangeExceeded 0x%02x\n", BoundRangeExceeded) - fmt.Fprintf(w, "#define InvalidOpcode 0x%02x\n", InvalidOpcode) - fmt.Fprintf(w, "#define DeviceNotAvailable 0x%02x\n", DeviceNotAvailable) - fmt.Fprintf(w, "#define DoubleFault 0x%02x\n", DoubleFault) - fmt.Fprintf(w, "#define CoprocessorSegmentOverrun 0x%02x\n", CoprocessorSegmentOverrun) - fmt.Fprintf(w, "#define InvalidTSS 0x%02x\n", InvalidTSS) - fmt.Fprintf(w, "#define SegmentNotPresent 0x%02x\n", SegmentNotPresent) - fmt.Fprintf(w, "#define StackSegmentFault 0x%02x\n", StackSegmentFault) - fmt.Fprintf(w, "#define GeneralProtectionFault 0x%02x\n", GeneralProtectionFault) - fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault) - fmt.Fprintf(w, "#define X87FloatingPointException 0x%02x\n", X87FloatingPointException) - fmt.Fprintf(w, "#define AlignmentCheck 0x%02x\n", AlignmentCheck) - fmt.Fprintf(w, "#define MachineCheck 0x%02x\n", MachineCheck) - fmt.Fprintf(w, "#define SIMDFloatingPointException 0x%02x\n", SIMDFloatingPointException) - fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException) - fmt.Fprintf(w, "#define SecurityException 0x%02x\n", SecurityException) - fmt.Fprintf(w, "#define SyscallInt80 0x%02x\n", SyscallInt80) - fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall) - - p := &syscall.PtraceRegs{} - fmt.Fprintf(w, "\n// Ptrace registers.\n") - fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.R13).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.R12).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RBP 0x%02x\n", reflect.ValueOf(&p.Rbp).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RBX 0x%02x\n", reflect.ValueOf(&p.Rbx).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.R11).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.R10).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.R9).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.R8).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RAX 0x%02x\n", reflect.ValueOf(&p.Rax).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RCX 0x%02x\n", reflect.ValueOf(&p.Rcx).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RDX 0x%02x\n", reflect.ValueOf(&p.Rdx).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RSI 0x%02x\n", reflect.ValueOf(&p.Rsi).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RDI 0x%02x\n", reflect.ValueOf(&p.Rdi).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_ORIGRAX 0x%02x\n", reflect.ValueOf(&p.Orig_rax).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RIP 0x%02x\n", reflect.ValueOf(&p.Rip).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_CS 0x%02x\n", reflect.ValueOf(&p.Cs).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_FLAGS 0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_RSP 0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_SS 0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_FS 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_GS 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer()) -} diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD deleted file mode 100644 index 3b95af617..000000000 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ /dev/null @@ -1,105 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") - -go_template( - name = "generic_walker", - srcs = [ - "walker_amd64.go", - ], - opt_types = [ - "Visitor", - ], - visibility = [":__pkg__"], -) - -go_template_instance( - name = "walker_map", - out = "walker_map.go", - package = "pagetables", - prefix = "map", - template = ":generic_walker", - types = { - "Visitor": "mapVisitor", - }, -) - -go_template_instance( - name = "walker_unmap", - out = "walker_unmap.go", - package = "pagetables", - prefix = "unmap", - template = ":generic_walker", - types = { - "Visitor": "unmapVisitor", - }, -) - -go_template_instance( - name = "walker_lookup", - out = "walker_lookup.go", - package = "pagetables", - prefix = "lookup", - template = ":generic_walker", - types = { - "Visitor": "lookupVisitor", - }, -) - -go_template_instance( - name = "walker_empty", - out = "walker_empty.go", - package = "pagetables", - prefix = "empty", - template = ":generic_walker", - types = { - "Visitor": "emptyVisitor", - }, -) - -go_template_instance( - name = "walker_check", - out = "walker_check.go", - package = "pagetables", - prefix = "check", - template = ":generic_walker", - types = { - "Visitor": "checkVisitor", - }, -) - -go_library( - name = "pagetables", - srcs = [ - "allocator.go", - "allocator_unsafe.go", - "pagetables.go", - "pagetables_amd64.go", - "pagetables_x86.go", - "pcids_x86.go", - "walker_empty.go", - "walker_lookup.go", - "walker_map.go", - "walker_unmap.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables", - visibility = [ - "//pkg/sentry/platform/kvm:__subpackages__", - "//pkg/sentry/platform/ring0:__subpackages__", - ], - deps = ["//pkg/sentry/usermem"], -) - -go_test( - name = "pagetables_test", - size = "small", - srcs = [ - "pagetables_amd64_test.go", - "pagetables_test.go", - "walker_check.go", - ], - embed = [":pagetables"], - deps = ["//pkg/sentry/usermem"], -) diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go deleted file mode 100644 index 35e917526..000000000 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -package pagetables - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -func Test2MAnd4K(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map a small page and a huge page. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*47) - - checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}}, - }) -} - -func Test1GAnd4K(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map a small page and a super page. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*47) - - checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}}, - }) -} - -func TestSplit1GPage(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map a super page and knock out the middle. - pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42) - pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize)) - - checkMappings(t, pt, []mapping{ - {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}}, - {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}}, - }) -} - -func TestSplit2MPage(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map a huge page and knock out the middle. - pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42) - pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize)) - - checkMappings(t, pt, []mapping{ - {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}}, - {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}}, - }) -} diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go new file mode 100755 index 000000000..ac1ccf3d3 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package pagetables + diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go deleted file mode 100644 index 6e95ad2b9..000000000 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pagetables - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -type mapping struct { - start uintptr - length uintptr - addr uintptr - opts MapOpts -} - -type checkVisitor struct { - expected []mapping // Input. - current int // Temporary. - found []mapping // Output. - failed string // Output. -} - -func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { - v.found = append(v.found, mapping{ - start: start, - length: align + 1, - addr: pte.Address(), - opts: pte.Opts(), - }) - if v.failed != "" { - // Don't keep looking for errors. - return - } - - if v.current >= len(v.expected) { - v.failed = "more mappings than expected" - } else if v.expected[v.current].start != start { - v.failed = "start didn't match expected" - } else if v.expected[v.current].length != (align + 1) { - v.failed = "end didn't match expected" - } else if v.expected[v.current].addr != pte.Address() { - v.failed = "address didn't match expected" - } else if v.expected[v.current].opts != pte.Opts() { - v.failed = "opts didn't match" - } - v.current++ -} - -func (*checkVisitor) requiresAlloc() bool { return false } - -func (*checkVisitor) requiresSplit() bool { return false } - -func checkMappings(t *testing.T, pt *PageTables, m []mapping) { - // Iterate over all the mappings. - w := checkWalker{ - pageTables: pt, - visitor: checkVisitor{ - expected: m, - }, - } - w.iterateRange(0, ^uintptr(0)) - - // Were we expected additional mappings? - if w.visitor.failed == "" && w.visitor.current != len(w.visitor.expected) { - w.visitor.failed = "insufficient mappings found" - } - - // Emit a meaningful error message on failure. - if w.visitor.failed != "" { - t.Errorf("%s; got %#v, wanted %#v", w.visitor.failed, w.visitor.found, w.visitor.expected) - } -} - -func TestUnmap(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map and unmap one entry. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Unmap(0x400000, pteSize) - - checkMappings(t, pt, nil) -} - -func TestReadOnly(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map one entry. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) - - checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, - }) -} - -func TestReadWrite(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map one entry. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - - checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - }) -} - -func TestSerialEntries(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map two sequential entries. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x401000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*47) - - checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}}, - }) -} - -func TestSpanningEntries(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Span a pgd with two pages. - pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) - - checkMappings(t, pt, []mapping{ - {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, - {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}}, - }) -} - -func TestSparseEntries(t *testing.T) { - pt := New(NewRuntimeAllocator()) - - // Map two entries in different pgds. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*47) - - checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}}, - }) -} diff --git a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_empty.go index 8f9dacd93..417784e17 100644..100755 --- a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go +++ b/pkg/sentry/platform/ring0/pagetables/walker_empty.go @@ -1,42 +1,12 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - package pagetables -// Visitor is a generic type. -type Visitor interface { - // visit is called on each PTE. - visit(start uintptr, pte *PTE, align uintptr) - - // requiresAlloc indicates that new entries should be allocated within - // the walked range. - requiresAlloc() bool - - // requiresSplit indicates that entries in the given range should be - // split if they are huge or jumbo pages. - requiresSplit() bool -} - // Walker walks page tables. -type Walker struct { +type emptyWalker struct { // pageTables are the tables to walk. pageTables *PageTables // Visitor is the set of arguments. - visitor Visitor + visitor emptyVisitor } // iterateRange iterates over all appropriate levels of page tables for the given range. @@ -63,7 +33,7 @@ type Walker struct { // non-canonical ranges. If they do, a panic will result. // //go:nosplit -func (w *Walker) iterateRange(start, end uintptr) { +func (w *emptyWalker) iterateRange(start, end uintptr) { if start%pteSize != 0 { panic("unaligned start") } @@ -104,7 +74,7 @@ func (w *Walker) iterateRange(start, end uintptr) { // next returns the next address quantized by the given size. // //go:nosplit -func next(start uintptr, size uintptr) uintptr { +func emptynext(start uintptr, size uintptr) uintptr { start &= ^(size - 1) start += size return start @@ -113,7 +83,7 @@ func next(start uintptr, size uintptr) uintptr { // iterateRangeCanonical walks a canonical range. // //go:nosplit -func (w *Walker) iterateRangeCanonical(start, end uintptr) { +func (w *emptyWalker) iterateRangeCanonical(start, end uintptr) { for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { var ( pgdEntry = &w.pageTables.root[pgdIndex] @@ -121,19 +91,17 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { ) if !pgdEntry.Valid() { if !w.visitor.requiresAlloc() { - // Skip over this entry. - start = next(start, pgdSize) + + start = emptynext(start, pgdSize) continue } - // Allocate a new pgd. pudEntries = w.pageTables.Allocator.NewPTEs() pgdEntry.setPageTable(w.pageTables, pudEntries) } else { pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) } - // Map the next level. clearPUDEntries := uint16(0) for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ { @@ -143,33 +111,28 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { ) if !pudEntry.Valid() { if !w.visitor.requiresAlloc() { - // Skip over this entry. + clearPUDEntries++ - start = next(start, pudSize) + start = emptynext(start, pudSize) continue } - // This level has 1-GB super pages. Is this - // entire region at least as large as a single - // PUD entry? If so, we can skip allocating a - // new page for the pmd. if start&(pudSize-1) == 0 && end-start >= pudSize { pudEntry.SetSuper() w.visitor.visit(uintptr(start), pudEntry, pudSize-1) if pudEntry.Valid() { - start = next(start, pudSize) + start = emptynext(start, pudSize) continue } } - // Allocate a new pud. pmdEntries = w.pageTables.Allocator.NewPTEs() pudEntry.setPageTable(w.pageTables, pmdEntries) } else if pudEntry.IsSuper() { - // Does this page need to be split? - if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) { - // Install the relevant entries. + + if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < emptynext(start, pudSize)) { + pmdEntries = w.pageTables.Allocator.NewPTEs() for index := uint16(0); index < entriesPerPage; index++ { pmdEntries[index].SetSuper() @@ -179,23 +142,20 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } pudEntry.setPageTable(w.pageTables, pmdEntries) } else { - // A super page to be checked directly. + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) - // Might have been cleared. if !pudEntry.Valid() { clearPUDEntries++ } - // Note that the super page was changed. - start = next(start, pudSize) + start = emptynext(start, pudSize) continue } } else { pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) } - // Map the next level, since this is valid. clearPMDEntries := uint16(0) for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ { @@ -205,32 +165,28 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { ) if !pmdEntry.Valid() { if !w.visitor.requiresAlloc() { - // Skip over this entry. + clearPMDEntries++ - start = next(start, pmdSize) + start = emptynext(start, pmdSize) continue } - // This level has 2-MB huge pages. If this - // region is contined in a single PMD entry? - // As above, we can skip allocating a new page. if start&(pmdSize-1) == 0 && end-start >= pmdSize { pmdEntry.SetSuper() w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) if pmdEntry.Valid() { - start = next(start, pmdSize) + start = emptynext(start, pmdSize) continue } } - // Allocate a new pmd. pteEntries = w.pageTables.Allocator.NewPTEs() pmdEntry.setPageTable(w.pageTables, pteEntries) } else if pmdEntry.IsSuper() { - // Does this page need to be split? - if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) { - // Install the relevant entries. + + if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < emptynext(start, pmdSize)) { + pteEntries = w.pageTables.Allocator.NewPTEs() for index := uint16(0); index < entriesPerPage; index++ { pteEntries[index].Set( @@ -239,23 +195,20 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } pmdEntry.setPageTable(w.pageTables, pteEntries) } else { - // A huge page to be checked directly. + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) - // Might have been cleared. if !pmdEntry.Valid() { clearPMDEntries++ } - // Note that the huge page was changed. - start = next(start, pmdSize) + start = emptynext(start, pmdSize) continue } } else { pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) } - // Map the next level, since this is valid. clearPTEEntries := uint16(0) for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ { @@ -268,7 +221,6 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { continue } - // At this point, we are guaranteed that start%pteSize == 0. w.visitor.visit(uintptr(start), pteEntry, pteSize-1) if !pteEntry.Valid() { if w.visitor.requiresAlloc() { @@ -277,12 +229,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { clearPTEEntries++ } - // Note that the pte was changed. start += pteSize continue } - // Check if we no longer need this page. if clearPTEEntries == entriesPerPage { pmdEntry.Clear() w.pageTables.Allocator.FreePTEs(pteEntries) @@ -290,7 +240,6 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } } - // Check if we no longer need this page. if clearPMDEntries == entriesPerPage { pudEntry.Clear() w.pageTables.Allocator.FreePTEs(pmdEntries) @@ -298,7 +247,6 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } } - // Check if we no longer need this page. if clearPUDEntries == entriesPerPage { pgdEntry.Clear() w.pageTables.Allocator.FreePTEs(pudEntries) diff --git a/pkg/sentry/platform/ring0/pagetables/walker_lookup.go b/pkg/sentry/platform/ring0/pagetables/walker_lookup.go new file mode 100755 index 000000000..906c9c50f --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/walker_lookup.go @@ -0,0 +1,255 @@ +package pagetables + +// Walker walks page tables. +type lookupWalker struct { + // pageTables are the tables to walk. + pageTables *PageTables + + // Visitor is the set of arguments. + visitor lookupVisitor +} + +// iterateRange iterates over all appropriate levels of page tables for the given range. +// +// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The +// exception is super pages. If a valid super page (huge or jumbo) cannot be +// installed, then the walk will continue to individual entries. +// +// This algorithm will attempt to maximize the use of super pages whenever +// possible. Whether a super page is provided will be clear through the range +// provided in the callback. +// +// Note that if requiresAlloc is true, then no gaps will be present. However, +// if alloc is not set, then the iteration will likely be full of gaps. +// +// Note that this function should generally be avoided in favor of Map, Unmap, +// etc. when not necessary. +// +// Precondition: start must be page-aligned. +// +// Precondition: start must be less than end. +// +// Precondition: If requiresAlloc is true, then start and end should not span +// non-canonical ranges. If they do, a panic will result. +// +//go:nosplit +func (w *lookupWalker) iterateRange(start, end uintptr) { + if start%pteSize != 0 { + panic("unaligned start") + } + if end < start { + panic("start > end") + } + if start < lowerTop { + if end <= lowerTop { + w.iterateRangeCanonical(start, end) + } else if end > lowerTop && end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + w.iterateRangeCanonical(upperBottom, end) + } + } else if start < upperBottom { + if end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(upperBottom, end) + } + } else { + w.iterateRangeCanonical(start, end) + } +} + +// next returns the next address quantized by the given size. +// +//go:nosplit +func lookupnext(start uintptr, size uintptr) uintptr { + start &= ^(size - 1) + start += size + return start +} + +// iterateRangeCanonical walks a canonical range. +// +//go:nosplit +func (w *lookupWalker) iterateRangeCanonical(start, end uintptr) { + for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { + var ( + pgdEntry = &w.pageTables.root[pgdIndex] + pudEntries *PTEs + ) + if !pgdEntry.Valid() { + if !w.visitor.requiresAlloc() { + + start = lookupnext(start, pgdSize) + continue + } + + pudEntries = w.pageTables.Allocator.NewPTEs() + pgdEntry.setPageTable(w.pageTables, pudEntries) + } else { + pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) + } + + clearPUDEntries := uint16(0) + + for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ { + var ( + pudEntry = &pudEntries[pudIndex] + pmdEntries *PTEs + ) + if !pudEntry.Valid() { + if !w.visitor.requiresAlloc() { + + clearPUDEntries++ + start = lookupnext(start, pudSize) + continue + } + + if start&(pudSize-1) == 0 && end-start >= pudSize { + pudEntry.SetSuper() + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if pudEntry.Valid() { + start = lookupnext(start, pudSize) + continue + } + } + + pmdEntries = w.pageTables.Allocator.NewPTEs() + pudEntry.setPageTable(w.pageTables, pmdEntries) + + } else if pudEntry.IsSuper() { + + if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < lookupnext(start, pudSize)) { + + pmdEntries = w.pageTables.Allocator.NewPTEs() + for index := uint16(0); index < entriesPerPage; index++ { + pmdEntries[index].SetSuper() + pmdEntries[index].Set( + pudEntry.Address()+(pmdSize*uintptr(index)), + pudEntry.Opts()) + } + pudEntry.setPageTable(w.pageTables, pmdEntries) + } else { + + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + + if !pudEntry.Valid() { + clearPUDEntries++ + } + + start = lookupnext(start, pudSize) + continue + } + } else { + pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) + } + + clearPMDEntries := uint16(0) + + for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ { + var ( + pmdEntry = &pmdEntries[pmdIndex] + pteEntries *PTEs + ) + if !pmdEntry.Valid() { + if !w.visitor.requiresAlloc() { + + clearPMDEntries++ + start = lookupnext(start, pmdSize) + continue + } + + if start&(pmdSize-1) == 0 && end-start >= pmdSize { + pmdEntry.SetSuper() + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if pmdEntry.Valid() { + start = lookupnext(start, pmdSize) + continue + } + } + + pteEntries = w.pageTables.Allocator.NewPTEs() + pmdEntry.setPageTable(w.pageTables, pteEntries) + + } else if pmdEntry.IsSuper() { + + if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < lookupnext(start, pmdSize)) { + + pteEntries = w.pageTables.Allocator.NewPTEs() + for index := uint16(0); index < entriesPerPage; index++ { + pteEntries[index].Set( + pmdEntry.Address()+(pteSize*uintptr(index)), + pmdEntry.Opts()) + } + pmdEntry.setPageTable(w.pageTables, pteEntries) + } else { + + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + + if !pmdEntry.Valid() { + clearPMDEntries++ + } + + start = lookupnext(start, pmdSize) + continue + } + } else { + pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) + } + + clearPTEEntries := uint16(0) + + for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ { + var ( + pteEntry = &pteEntries[pteIndex] + ) + if !pteEntry.Valid() && !w.visitor.requiresAlloc() { + clearPTEEntries++ + start += pteSize + continue + } + + w.visitor.visit(uintptr(start), pteEntry, pteSize-1) + if !pteEntry.Valid() { + if w.visitor.requiresAlloc() { + panic("PTE not set after iteration with requiresAlloc!") + } + clearPTEEntries++ + } + + start += pteSize + continue + } + + if clearPTEEntries == entriesPerPage { + pmdEntry.Clear() + w.pageTables.Allocator.FreePTEs(pteEntries) + clearPMDEntries++ + } + } + + if clearPMDEntries == entriesPerPage { + pudEntry.Clear() + w.pageTables.Allocator.FreePTEs(pmdEntries) + clearPUDEntries++ + } + } + + if clearPUDEntries == entriesPerPage { + pgdEntry.Clear() + w.pageTables.Allocator.FreePTEs(pudEntries) + } + } +} diff --git a/pkg/sentry/platform/ring0/pagetables/walker_map.go b/pkg/sentry/platform/ring0/pagetables/walker_map.go new file mode 100755 index 000000000..61ee3c825 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/walker_map.go @@ -0,0 +1,255 @@ +package pagetables + +// Walker walks page tables. +type mapWalker struct { + // pageTables are the tables to walk. + pageTables *PageTables + + // Visitor is the set of arguments. + visitor mapVisitor +} + +// iterateRange iterates over all appropriate levels of page tables for the given range. +// +// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The +// exception is super pages. If a valid super page (huge or jumbo) cannot be +// installed, then the walk will continue to individual entries. +// +// This algorithm will attempt to maximize the use of super pages whenever +// possible. Whether a super page is provided will be clear through the range +// provided in the callback. +// +// Note that if requiresAlloc is true, then no gaps will be present. However, +// if alloc is not set, then the iteration will likely be full of gaps. +// +// Note that this function should generally be avoided in favor of Map, Unmap, +// etc. when not necessary. +// +// Precondition: start must be page-aligned. +// +// Precondition: start must be less than end. +// +// Precondition: If requiresAlloc is true, then start and end should not span +// non-canonical ranges. If they do, a panic will result. +// +//go:nosplit +func (w *mapWalker) iterateRange(start, end uintptr) { + if start%pteSize != 0 { + panic("unaligned start") + } + if end < start { + panic("start > end") + } + if start < lowerTop { + if end <= lowerTop { + w.iterateRangeCanonical(start, end) + } else if end > lowerTop && end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + w.iterateRangeCanonical(upperBottom, end) + } + } else if start < upperBottom { + if end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(upperBottom, end) + } + } else { + w.iterateRangeCanonical(start, end) + } +} + +// next returns the next address quantized by the given size. +// +//go:nosplit +func mapnext(start uintptr, size uintptr) uintptr { + start &= ^(size - 1) + start += size + return start +} + +// iterateRangeCanonical walks a canonical range. +// +//go:nosplit +func (w *mapWalker) iterateRangeCanonical(start, end uintptr) { + for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { + var ( + pgdEntry = &w.pageTables.root[pgdIndex] + pudEntries *PTEs + ) + if !pgdEntry.Valid() { + if !w.visitor.requiresAlloc() { + + start = mapnext(start, pgdSize) + continue + } + + pudEntries = w.pageTables.Allocator.NewPTEs() + pgdEntry.setPageTable(w.pageTables, pudEntries) + } else { + pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) + } + + clearPUDEntries := uint16(0) + + for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ { + var ( + pudEntry = &pudEntries[pudIndex] + pmdEntries *PTEs + ) + if !pudEntry.Valid() { + if !w.visitor.requiresAlloc() { + + clearPUDEntries++ + start = mapnext(start, pudSize) + continue + } + + if start&(pudSize-1) == 0 && end-start >= pudSize { + pudEntry.SetSuper() + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if pudEntry.Valid() { + start = mapnext(start, pudSize) + continue + } + } + + pmdEntries = w.pageTables.Allocator.NewPTEs() + pudEntry.setPageTable(w.pageTables, pmdEntries) + + } else if pudEntry.IsSuper() { + + if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < mapnext(start, pudSize)) { + + pmdEntries = w.pageTables.Allocator.NewPTEs() + for index := uint16(0); index < entriesPerPage; index++ { + pmdEntries[index].SetSuper() + pmdEntries[index].Set( + pudEntry.Address()+(pmdSize*uintptr(index)), + pudEntry.Opts()) + } + pudEntry.setPageTable(w.pageTables, pmdEntries) + } else { + + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + + if !pudEntry.Valid() { + clearPUDEntries++ + } + + start = mapnext(start, pudSize) + continue + } + } else { + pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) + } + + clearPMDEntries := uint16(0) + + for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ { + var ( + pmdEntry = &pmdEntries[pmdIndex] + pteEntries *PTEs + ) + if !pmdEntry.Valid() { + if !w.visitor.requiresAlloc() { + + clearPMDEntries++ + start = mapnext(start, pmdSize) + continue + } + + if start&(pmdSize-1) == 0 && end-start >= pmdSize { + pmdEntry.SetSuper() + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if pmdEntry.Valid() { + start = mapnext(start, pmdSize) + continue + } + } + + pteEntries = w.pageTables.Allocator.NewPTEs() + pmdEntry.setPageTable(w.pageTables, pteEntries) + + } else if pmdEntry.IsSuper() { + + if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < mapnext(start, pmdSize)) { + + pteEntries = w.pageTables.Allocator.NewPTEs() + for index := uint16(0); index < entriesPerPage; index++ { + pteEntries[index].Set( + pmdEntry.Address()+(pteSize*uintptr(index)), + pmdEntry.Opts()) + } + pmdEntry.setPageTable(w.pageTables, pteEntries) + } else { + + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + + if !pmdEntry.Valid() { + clearPMDEntries++ + } + + start = mapnext(start, pmdSize) + continue + } + } else { + pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) + } + + clearPTEEntries := uint16(0) + + for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ { + var ( + pteEntry = &pteEntries[pteIndex] + ) + if !pteEntry.Valid() && !w.visitor.requiresAlloc() { + clearPTEEntries++ + start += pteSize + continue + } + + w.visitor.visit(uintptr(start), pteEntry, pteSize-1) + if !pteEntry.Valid() { + if w.visitor.requiresAlloc() { + panic("PTE not set after iteration with requiresAlloc!") + } + clearPTEEntries++ + } + + start += pteSize + continue + } + + if clearPTEEntries == entriesPerPage { + pmdEntry.Clear() + w.pageTables.Allocator.FreePTEs(pteEntries) + clearPMDEntries++ + } + } + + if clearPMDEntries == entriesPerPage { + pudEntry.Clear() + w.pageTables.Allocator.FreePTEs(pmdEntries) + clearPUDEntries++ + } + } + + if clearPUDEntries == entriesPerPage { + pgdEntry.Clear() + w.pageTables.Allocator.FreePTEs(pudEntries) + } + } +} diff --git a/pkg/sentry/platform/ring0/pagetables/walker_unmap.go b/pkg/sentry/platform/ring0/pagetables/walker_unmap.go new file mode 100755 index 000000000..be2aa0ce4 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/walker_unmap.go @@ -0,0 +1,255 @@ +package pagetables + +// Walker walks page tables. +type unmapWalker struct { + // pageTables are the tables to walk. + pageTables *PageTables + + // Visitor is the set of arguments. + visitor unmapVisitor +} + +// iterateRange iterates over all appropriate levels of page tables for the given range. +// +// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The +// exception is super pages. If a valid super page (huge or jumbo) cannot be +// installed, then the walk will continue to individual entries. +// +// This algorithm will attempt to maximize the use of super pages whenever +// possible. Whether a super page is provided will be clear through the range +// provided in the callback. +// +// Note that if requiresAlloc is true, then no gaps will be present. However, +// if alloc is not set, then the iteration will likely be full of gaps. +// +// Note that this function should generally be avoided in favor of Map, Unmap, +// etc. when not necessary. +// +// Precondition: start must be page-aligned. +// +// Precondition: start must be less than end. +// +// Precondition: If requiresAlloc is true, then start and end should not span +// non-canonical ranges. If they do, a panic will result. +// +//go:nosplit +func (w *unmapWalker) iterateRange(start, end uintptr) { + if start%pteSize != 0 { + panic("unaligned start") + } + if end < start { + panic("start > end") + } + if start < lowerTop { + if end <= lowerTop { + w.iterateRangeCanonical(start, end) + } else if end > lowerTop && end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + w.iterateRangeCanonical(upperBottom, end) + } + } else if start < upperBottom { + if end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(upperBottom, end) + } + } else { + w.iterateRangeCanonical(start, end) + } +} + +// next returns the next address quantized by the given size. +// +//go:nosplit +func unmapnext(start uintptr, size uintptr) uintptr { + start &= ^(size - 1) + start += size + return start +} + +// iterateRangeCanonical walks a canonical range. +// +//go:nosplit +func (w *unmapWalker) iterateRangeCanonical(start, end uintptr) { + for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { + var ( + pgdEntry = &w.pageTables.root[pgdIndex] + pudEntries *PTEs + ) + if !pgdEntry.Valid() { + if !w.visitor.requiresAlloc() { + + start = unmapnext(start, pgdSize) + continue + } + + pudEntries = w.pageTables.Allocator.NewPTEs() + pgdEntry.setPageTable(w.pageTables, pudEntries) + } else { + pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) + } + + clearPUDEntries := uint16(0) + + for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ { + var ( + pudEntry = &pudEntries[pudIndex] + pmdEntries *PTEs + ) + if !pudEntry.Valid() { + if !w.visitor.requiresAlloc() { + + clearPUDEntries++ + start = unmapnext(start, pudSize) + continue + } + + if start&(pudSize-1) == 0 && end-start >= pudSize { + pudEntry.SetSuper() + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if pudEntry.Valid() { + start = unmapnext(start, pudSize) + continue + } + } + + pmdEntries = w.pageTables.Allocator.NewPTEs() + pudEntry.setPageTable(w.pageTables, pmdEntries) + + } else if pudEntry.IsSuper() { + + if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < unmapnext(start, pudSize)) { + + pmdEntries = w.pageTables.Allocator.NewPTEs() + for index := uint16(0); index < entriesPerPage; index++ { + pmdEntries[index].SetSuper() + pmdEntries[index].Set( + pudEntry.Address()+(pmdSize*uintptr(index)), + pudEntry.Opts()) + } + pudEntry.setPageTable(w.pageTables, pmdEntries) + } else { + + w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + + if !pudEntry.Valid() { + clearPUDEntries++ + } + + start = unmapnext(start, pudSize) + continue + } + } else { + pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) + } + + clearPMDEntries := uint16(0) + + for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ { + var ( + pmdEntry = &pmdEntries[pmdIndex] + pteEntries *PTEs + ) + if !pmdEntry.Valid() { + if !w.visitor.requiresAlloc() { + + clearPMDEntries++ + start = unmapnext(start, pmdSize) + continue + } + + if start&(pmdSize-1) == 0 && end-start >= pmdSize { + pmdEntry.SetSuper() + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if pmdEntry.Valid() { + start = unmapnext(start, pmdSize) + continue + } + } + + pteEntries = w.pageTables.Allocator.NewPTEs() + pmdEntry.setPageTable(w.pageTables, pteEntries) + + } else if pmdEntry.IsSuper() { + + if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < unmapnext(start, pmdSize)) { + + pteEntries = w.pageTables.Allocator.NewPTEs() + for index := uint16(0); index < entriesPerPage; index++ { + pteEntries[index].Set( + pmdEntry.Address()+(pteSize*uintptr(index)), + pmdEntry.Opts()) + } + pmdEntry.setPageTable(w.pageTables, pteEntries) + } else { + + w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + + if !pmdEntry.Valid() { + clearPMDEntries++ + } + + start = unmapnext(start, pmdSize) + continue + } + } else { + pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) + } + + clearPTEEntries := uint16(0) + + for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ { + var ( + pteEntry = &pteEntries[pteIndex] + ) + if !pteEntry.Valid() && !w.visitor.requiresAlloc() { + clearPTEEntries++ + start += pteSize + continue + } + + w.visitor.visit(uintptr(start), pteEntry, pteSize-1) + if !pteEntry.Valid() { + if w.visitor.requiresAlloc() { + panic("PTE not set after iteration with requiresAlloc!") + } + clearPTEEntries++ + } + + start += pteSize + continue + } + + if clearPTEEntries == entriesPerPage { + pmdEntry.Clear() + w.pageTables.Allocator.FreePTEs(pteEntries) + clearPMDEntries++ + } + } + + if clearPMDEntries == entriesPerPage { + pudEntry.Clear() + w.pageTables.Allocator.FreePTEs(pmdEntries) + clearPUDEntries++ + } + } + + if clearPUDEntries == entriesPerPage { + pgdEntry.Clear() + w.pageTables.Allocator.FreePTEs(pudEntries) + } + } +} diff --git a/pkg/sentry/platform/ring0/ring0_state_autogen.go b/pkg/sentry/platform/ring0/ring0_state_autogen.go new file mode 100755 index 000000000..462f9a446 --- /dev/null +++ b/pkg/sentry/platform/ring0/ring0_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package ring0 + diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go deleted file mode 100644 index 5f80d64e8..000000000 --- a/pkg/sentry/platform/ring0/x86.go +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build i386 amd64 - -package ring0 - -import ( - "gvisor.dev/gvisor/pkg/cpuid" -) - -// Useful bits. -const ( - _CR0_PE = 1 << 0 - _CR0_ET = 1 << 4 - _CR0_AM = 1 << 18 - _CR0_PG = 1 << 31 - - _CR4_PSE = 1 << 4 - _CR4_PAE = 1 << 5 - _CR4_PGE = 1 << 7 - _CR4_OSFXSR = 1 << 9 - _CR4_OSXMMEXCPT = 1 << 10 - _CR4_FSGSBASE = 1 << 16 - _CR4_PCIDE = 1 << 17 - _CR4_OSXSAVE = 1 << 18 - _CR4_SMEP = 1 << 20 - - _RFLAGS_AC = 1 << 18 - _RFLAGS_NT = 1 << 14 - _RFLAGS_IOPL = 3 << 12 - _RFLAGS_DF = 1 << 10 - _RFLAGS_IF = 1 << 9 - _RFLAGS_STEP = 1 << 8 - _RFLAGS_RESERVED = 1 << 1 - - _EFER_SCE = 0x001 - _EFER_LME = 0x100 - _EFER_LMA = 0x400 - _EFER_NX = 0x800 - - _MSR_STAR = 0xc0000081 - _MSR_LSTAR = 0xc0000082 - _MSR_CSTAR = 0xc0000083 - _MSR_SYSCALL_MASK = 0xc0000084 - _MSR_PLATFORM_INFO = 0xce - _MSR_MISC_FEATURES = 0x140 - - _PLATFORM_INFO_CPUID_FAULT = 1 << 31 - - _MISC_FEATURE_CPUID_TRAP = 0x1 -) - -const ( - // KernelFlagsSet should always be set in the kernel. - KernelFlagsSet = _RFLAGS_RESERVED - - // UserFlagsSet are always set in userspace. - UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF - - // KernelFlagsClear should always be clear in the kernel. - KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT - - // UserFlagsClear are always cleared in userspace. - UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL -) - -// Vector is an exception vector. -type Vector uintptr - -// Exception vectors. -const ( - DivideByZero Vector = iota - Debug - NMI - Breakpoint - Overflow - BoundRangeExceeded - InvalidOpcode - DeviceNotAvailable - DoubleFault - CoprocessorSegmentOverrun - InvalidTSS - SegmentNotPresent - StackSegmentFault - GeneralProtectionFault - PageFault - _ - X87FloatingPointException - AlignmentCheck - MachineCheck - SIMDFloatingPointException - VirtualizationException - SecurityException = 0x1e - SyscallInt80 = 0x80 - _NR_INTERRUPTS = SyscallInt80 + 1 -) - -// System call vectors. -const ( - Syscall Vector = _NR_INTERRUPTS -) - -// VirtualAddressBits returns the number bits available for virtual addresses. -// -// Note that sign-extension semantics apply to the highest order bit. -// -// FIXME(b/69382326): This should use the cpuid passed to Init. -func VirtualAddressBits() uint32 { - ax, _, _, _ := cpuid.HostID(0x80000008, 0) - return (ax >> 8) & 0xff -} - -// PhysicalAddressBits returns the number of bits available for physical addresses. -// -// FIXME(b/69382326): This should use the cpuid passed to Init. -func PhysicalAddressBits() uint32 { - ax, _, _, _ := cpuid.HostID(0x80000008, 0) - return ax & 0xff -} - -// Selector is a segment Selector. -type Selector uint16 - -// SegmentDescriptor is a segment descriptor. -type SegmentDescriptor struct { - bits [2]uint32 -} - -// descriptorTable is a collection of descriptors. -type descriptorTable [32]SegmentDescriptor - -// SegmentDescriptorFlags are typed flags within a descriptor. -type SegmentDescriptorFlags uint32 - -// SegmentDescriptorFlag declarations. -const ( - SegmentDescriptorAccess SegmentDescriptorFlags = 1 << 8 // Access bit (always set). - SegmentDescriptorWrite = 1 << 9 // Write permission. - SegmentDescriptorExpandDown = 1 << 10 // Grows down, not used. - SegmentDescriptorExecute = 1 << 11 // Execute permission. - SegmentDescriptorSystem = 1 << 12 // Zero => system, 1 => user code/data. - SegmentDescriptorPresent = 1 << 15 // Present. - SegmentDescriptorAVL = 1 << 20 // Available. - SegmentDescriptorLong = 1 << 21 // Long mode. - SegmentDescriptorDB = 1 << 22 // 16 or 32-bit. - SegmentDescriptorG = 1 << 23 // Granularity: page or byte. -) - -// Base returns the descriptor's base linear address. -func (d *SegmentDescriptor) Base() uint32 { - return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16 -} - -// Limit returns the descriptor size. -func (d *SegmentDescriptor) Limit() uint32 { - l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000 - if d.bits[1]&uint32(SegmentDescriptorG) != 0 { - l <<= 12 - l |= 0xFFF - } - return l -} - -// Flags returns descriptor flags. -func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags { - return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00) -} - -// DPL returns the descriptor privilege level. -func (d *SegmentDescriptor) DPL() int { - return int((d.bits[1] >> 13) & 3) -} - -func (d *SegmentDescriptor) setNull() { - d.bits[0] = 0 - d.bits[1] = 0 -} - -func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) { - flags |= SegmentDescriptorPresent - if limit>>12 != 0 { - limit >>= 12 - flags |= SegmentDescriptorG - } - d.bits[0] = base<<16 | limit&0xFFFF - d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13 -} - -func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) { - d.set(base, limit, dpl, - SegmentDescriptorDB| - SegmentDescriptorExecute| - SegmentDescriptorSystem) -} - -func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) { - d.set(base, limit, dpl, - SegmentDescriptorG| - SegmentDescriptorLong| - SegmentDescriptorExecute| - SegmentDescriptorSystem) -} - -func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) { - d.set(base, limit, dpl, - SegmentDescriptorWrite| - SegmentDescriptorSystem) -} - -// setHi is only used for the TSS segment, which is magically 64-bits. -func (d *SegmentDescriptor) setHi(base uint32) { - d.bits[0] = base - d.bits[1] = 0 -} - -// Gate64 is a 64-bit task, trap, or interrupt gate. -type Gate64 struct { - bits [4]uint32 -} - -// idt64 is a 64-bit interrupt descriptor table. -type idt64 [_NR_INTERRUPTS]Gate64 - -func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) { - g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF - g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7 - g.bits[2] = uint32(rip >> 32) -} - -func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) { - g.setInterrupt(cs, rip, dpl, ist) - g.bits[1] |= 1 << 8 -} - -// TaskState64 is a 64-bit task state structure. -type TaskState64 struct { - _ uint32 - rsp0Lo, rsp0Hi uint32 - rsp1Lo, rsp1Hi uint32 - rsp2Lo, rsp2Hi uint32 - _ [2]uint32 - ist1Lo, ist1Hi uint32 - ist2Lo, ist2Hi uint32 - ist3Lo, ist3Hi uint32 - ist4Lo, ist4Hi uint32 - ist5Lo, ist5Hi uint32 - ist6Lo, ist6Hi uint32 - ist7Lo, ist7Hi uint32 - _ [2]uint32 - _ uint16 - ioPerm uint16 -} diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD deleted file mode 100644 index 924d8a6d6..000000000 --- a/pkg/sentry/platform/safecopy/BUILD +++ /dev/null @@ -1,30 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "safecopy", - srcs = [ - "atomic_amd64.s", - "atomic_arm64.s", - "memclr_amd64.s", - "memclr_arm64.s", - "memcpy_amd64.s", - "memcpy_arm64.s", - "safecopy.go", - "safecopy_unsafe.go", - "sighandler_amd64.s", - "sighandler_arm64.s", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/safecopy", - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/syserror"], -) - -go_test( - name = "safecopy_test", - srcs = [ - "safecopy_test.go", - ], - embed = [":safecopy"], -) diff --git a/pkg/sentry/platform/safecopy/LICENSE b/pkg/sentry/platform/safecopy/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/pkg/sentry/platform/safecopy/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/sentry/platform/safecopy/safecopy_state_autogen.go b/pkg/sentry/platform/safecopy/safecopy_state_autogen.go new file mode 100755 index 000000000..58fd8fbd0 --- /dev/null +++ b/pkg/sentry/platform/safecopy/safecopy_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package safecopy + diff --git a/pkg/sentry/platform/safecopy/safecopy_test.go b/pkg/sentry/platform/safecopy/safecopy_test.go deleted file mode 100644 index 5818f7f9b..000000000 --- a/pkg/sentry/platform/safecopy/safecopy_test.go +++ /dev/null @@ -1,617 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safecopy - -import ( - "bytes" - "fmt" - "io/ioutil" - "math/rand" - "os" - "runtime/debug" - "syscall" - "testing" - "unsafe" -) - -// Size of a page in bytes. Cloned from usermem.PageSize to avoid a circular -// dependency. -const pageSize = 4096 - -func initRandom(b []byte) { - for i := range b { - b[i] = byte(rand.Intn(256)) - } -} - -func randBuf(size int) []byte { - b := make([]byte, size) - initRandom(b) - return b -} - -func TestCopyInSuccess(t *testing.T) { - // Test that CopyIn does not return an error when all pages are accessible. - const bufLen = 8192 - a := randBuf(bufLen) - b := make([]byte, bufLen) - - n, err := CopyIn(b, unsafe.Pointer(&a[0])) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestCopyOutSuccess(t *testing.T) { - // Test that CopyOut does not return an error when all pages are - // accessible. - const bufLen = 8192 - a := randBuf(bufLen) - b := make([]byte, bufLen) - - n, err := CopyOut(unsafe.Pointer(&b[0]), a) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestCopySuccess(t *testing.T) { - // Test that Copy does not return an error when all pages are accessible. - const bufLen = 8192 - a := randBuf(bufLen) - b := make([]byte, bufLen) - - n, err := Copy(unsafe.Pointer(&b[0]), unsafe.Pointer(&a[0]), bufLen) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestZeroOutSuccess(t *testing.T) { - // Test that ZeroOut does not return an error when all pages are - // accessible. - const bufLen = 8192 - a := make([]byte, bufLen) - b := randBuf(bufLen) - - n, err := ZeroOut(unsafe.Pointer(&b[0]), bufLen) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestSwapUint32Success(t *testing.T) { - // Test that SwapUint32 does not return an error when the page is - // accessible. - before := uint32(rand.Int31()) - after := uint32(rand.Int31()) - val := before - - old, err := SwapUint32(unsafe.Pointer(&val), after) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if old != before { - t.Errorf("Unexpected old value: got %v, want %v", old, before) - } - if val != after { - t.Errorf("Unexpected new value: got %v, want %v", val, after) - } -} - -func TestSwapUint32AlignmentError(t *testing.T) { - // Test that SwapUint32 returns an AlignmentError when passed an unaligned - // address. - data := new(struct{ val uint64 }) - addr := uintptr(unsafe.Pointer(&data.val)) + 1 - want := AlignmentError{Addr: addr, Alignment: 4} - if _, err := SwapUint32(unsafe.Pointer(addr), 1); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } -} - -func TestSwapUint64Success(t *testing.T) { - // Test that SwapUint64 does not return an error when the page is - // accessible. - before := uint64(rand.Int63()) - after := uint64(rand.Int63()) - // "The first word in ... an allocated struct or slice can be relied upon - // to be 64-bit aligned." - sync/atomic docs - data := new(struct{ val uint64 }) - data.val = before - - old, err := SwapUint64(unsafe.Pointer(&data.val), after) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if old != before { - t.Errorf("Unexpected old value: got %v, want %v", old, before) - } - if data.val != after { - t.Errorf("Unexpected new value: got %v, want %v", data.val, after) - } -} - -func TestSwapUint64AlignmentError(t *testing.T) { - // Test that SwapUint64 returns an AlignmentError when passed an unaligned - // address. - data := new(struct{ val1, val2 uint64 }) - addr := uintptr(unsafe.Pointer(&data.val1)) + 1 - want := AlignmentError{Addr: addr, Alignment: 8} - if _, err := SwapUint64(unsafe.Pointer(addr), 1); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } -} - -func TestCompareAndSwapUint32Success(t *testing.T) { - // Test that CompareAndSwapUint32 does not return an error when the page is - // accessible. - before := uint32(rand.Int31()) - after := uint32(rand.Int31()) - val := before - - old, err := CompareAndSwapUint32(unsafe.Pointer(&val), before, after) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if old != before { - t.Errorf("Unexpected old value: got %v, want %v", old, before) - } - if val != after { - t.Errorf("Unexpected new value: got %v, want %v", val, after) - } -} - -func TestCompareAndSwapUint32AlignmentError(t *testing.T) { - // Test that CompareAndSwapUint32 returns an AlignmentError when passed an - // unaligned address. - data := new(struct{ val uint64 }) - addr := uintptr(unsafe.Pointer(&data.val)) + 1 - want := AlignmentError{Addr: addr, Alignment: 4} - if _, err := CompareAndSwapUint32(unsafe.Pointer(addr), 0, 1); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } -} - -// withSegvErrorTestMapping calls fn with a two-page mapping. The first page -// contains random data, and the second page generates SIGSEGV when accessed. -func withSegvErrorTestMapping(t *testing.T, fn func(m []byte)) { - mapping, err := syscall.Mmap(-1, 0, 2*pageSize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - } - defer syscall.Munmap(mapping) - if err := syscall.Mprotect(mapping[pageSize:], syscall.PROT_NONE); err != nil { - t.Fatalf("Mprotect failed: %v", err) - } - initRandom(mapping[:pageSize]) - - fn(mapping) -} - -// withBusErrorTestMapping calls fn with a two-page mapping. The first page -// contains random data, and the second page generates SIGBUS when accessed. -func withBusErrorTestMapping(t *testing.T, fn func(m []byte)) { - f, err := ioutil.TempFile("", "sigbus_test") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - defer f.Close() - if err := f.Truncate(pageSize); err != nil { - t.Fatalf("Truncate failed: %v", err) - } - mapping, err := syscall.Mmap(int(f.Fd()), 0, 2*pageSize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - } - defer syscall.Munmap(mapping) - initRandom(mapping[:pageSize]) - - fn(mapping) -} - -func TestCopyInSegvError(t *testing.T) { - // Test that CopyIn returns a SegvError when reaching a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := CopyIn(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyInBusError(t *testing.T) { - // Test that CopyIn returns a BusError when reaching a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := CopyIn(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyOutSegvError(t *testing.T) { - // Test that CopyOut returns a SegvError when reaching a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := CopyOut(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyOutBusError(t *testing.T) { - // Test that CopyOut returns a BusError when reaching a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := CopyOut(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopySourceSegvError(t *testing.T) { - // Test that Copy returns a SegvError when copying from a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopySourceBusError(t *testing.T) { - // Test that Copy returns a BusError when copying from a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyDestinationSegvError(t *testing.T) { - // Test that Copy returns a SegvError when copying to a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyDestinationBusError(t *testing.T) { - // Test that Copy returns a BusError when copying to a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestZeroOutSegvError(t *testing.T) { - // Test that ZeroOut returns a SegvError when reaching a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting write %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - n, err := ZeroOut(dst, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], make([]byte, bytesBeforeFault); !bytes.Equal(got, want) { - t.Errorf("Non-zero bytes in written part of mapping: %v", got) - } - }) - }) - } -} - -func TestZeroOutBusError(t *testing.T) { - // Test that ZeroOut returns a BusError when reaching a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting write %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - n, err := ZeroOut(dst, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], make([]byte, bytesBeforeFault); !bytes.Equal(got, want) { - t.Errorf("Non-zero bytes in written part of mapping: %v", got) - } - }) - }) - } -} - -func TestSwapUint32SegvError(t *testing.T) { - // Test that SwapUint32 returns a SegvError when reaching a page that - // signals SIGSEGV. - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint32(unsafe.Pointer(secondPage), 1) - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestSwapUint32BusError(t *testing.T) { - // Test that SwapUint32 returns a BusError when reaching a page that - // signals SIGBUS. - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint32(unsafe.Pointer(secondPage), 1) - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestSwapUint64SegvError(t *testing.T) { - // Test that SwapUint64 returns a SegvError when reaching a page that - // signals SIGSEGV. - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint64(unsafe.Pointer(secondPage), 1) - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestSwapUint64BusError(t *testing.T) { - // Test that SwapUint64 returns a BusError when reaching a page that - // signals SIGBUS. - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint64(unsafe.Pointer(secondPage), 1) - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestCompareAndSwapUint32SegvError(t *testing.T) { - // Test that CompareAndSwapUint32 returns a SegvError when reaching a page - // that signals SIGSEGV. - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestCompareAndSwapUint32BusError(t *testing.T) { - // Test that CompareAndSwapUint32 returns a BusError when reaching a page - // that signals SIGBUS. - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func testCopy(dst, src []byte) (panicked bool) { - defer func() { - if r := recover(); r != nil { - panicked = true - } - }() - debug.SetPanicOnFault(true) - copy(dst, src) - return -} - -func TestSegVOnMemmove(t *testing.T) { - // Test that SIGSEGVs received by runtime.memmove when *not* doing - // CopyIn or CopyOut work gets propagated to the runtime. - const bufLen = pageSize - a, err := syscall.Mmap(-1, 0, bufLen, syscall.PROT_NONE, syscall.MAP_ANON|syscall.MAP_PRIVATE) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - - } - defer syscall.Munmap(a) - b := randBuf(bufLen) - - if !testCopy(b, a) { - t.Fatalf("testCopy didn't panic when it should have") - } - - if !testCopy(a, b) { - t.Fatalf("testCopy didn't panic when it should have") - } -} - -func TestSigbusOnMemmove(t *testing.T) { - // Test that SIGBUS received by runtime.memmove when *not* doing - // CopyIn or CopyOut work gets propagated to the runtime. - const bufLen = pageSize - f, err := ioutil.TempFile("", "sigbus_test") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - os.Remove(f.Name()) - defer f.Close() - - a, err := syscall.Mmap(int(f.Fd()), 0, bufLen, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - - } - defer syscall.Munmap(a) - b := randBuf(bufLen) - - if !testCopy(b, a) { - t.Fatalf("testCopy didn't panic when it should have") - } - - if !testCopy(a, b) { - t.Fatalf("testCopy didn't panic when it should have") - } -} diff --git a/pkg/sentry/safemem/BUILD b/pkg/sentry/safemem/BUILD deleted file mode 100644 index fd6dc8e6e..000000000 --- a/pkg/sentry/safemem/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "safemem", - srcs = [ - "block_unsafe.go", - "io.go", - "safemem.go", - "seq_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/safemem", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/platform/safecopy", - ], -) - -go_test( - name = "safemem_test", - size = "small", - srcs = [ - "io_test.go", - "seq_test.go", - ], - embed = [":safemem"], -) diff --git a/pkg/sentry/safemem/io_test.go b/pkg/sentry/safemem/io_test.go deleted file mode 100644 index 629741bee..000000000 --- a/pkg/sentry/safemem/io_test.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "bytes" - "io" - "testing" -) - -func makeBlocks(slices ...[]byte) []Block { - blocks := make([]Block, 0, len(slices)) - for _, s := range slices { - blocks = append(blocks, BlockFromSafeSlice(s)) - } - return blocks -} - -func TestFromIOReaderFullRead(t *testing.T) { - r := FromIOReader{bytes.NewBufferString("foobar")} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("foo"), []byte("bar")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -type eofHidingReader struct { - Reader io.Reader -} - -func (r eofHidingReader) Read(dst []byte) (int, error) { - n, err := r.Reader.Read(dst) - if err == io.EOF { - return n, nil - } - return n, err -} - -func TestFromIOReaderPartialRead(t *testing.T) { - r := FromIOReader{eofHidingReader{bytes.NewBufferString("foob")}} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) - // FromIOReader should stop after the eofHidingReader returns (1, nil) - // for a 3-byte read. - if wantN := uint64(4); n != wantN || err != nil { - t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("foo"), []byte("b\x00\x00")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -type singleByteReader struct { - Reader io.Reader -} - -func (r singleByteReader) Read(dst []byte) (int, error) { - if len(dst) == 0 { - return r.Reader.Read(dst) - } - return r.Reader.Read(dst[:1]) -} - -func TestSingleByteReader(t *testing.T) { - r := FromIOReader{singleByteReader{bytes.NewBufferString("foobar")}} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) - // FromIOReader should stop after the singleByteReader returns (1, nil) - // for a 3-byte read. - if wantN := uint64(1); n != wantN || err != nil { - t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("f\x00\x00"), []byte("\x00\x00\x00")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -func TestReadFullToBlocks(t *testing.T) { - r := FromIOReader{singleByteReader{bytes.NewBufferString("foobar")}} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := ReadFullToBlocks(r, BlockSeqFromSlice(dsts)) - // ReadFullToBlocks should call into FromIOReader => singleByteReader - // repeatedly until dsts is exhausted. - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("ReadFullToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("foo"), []byte("bar")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -func TestFromIOWriterFullWrite(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{&dst} - n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("foobar"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -type limitedWriter struct { - Writer io.Writer - Done int - Limit int -} - -func (w *limitedWriter) Write(src []byte) (int, error) { - count := len(src) - if count > (w.Limit - w.Done) { - count = w.Limit - w.Done - } - n, err := w.Writer.Write(src[:count]) - w.Done += n - return n, err -} - -func TestFromIOWriterPartialWrite(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{&limitedWriter{&dst, 0, 4}} - n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) - // FromIOWriter should stop after the limitedWriter returns (1, nil) for a - // 3-byte write. - if wantN := uint64(4); n != wantN || err != nil { - t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -type singleByteWriter struct { - Writer io.Writer -} - -func (w singleByteWriter) Write(src []byte) (int, error) { - if len(src) == 0 { - return w.Writer.Write(src) - } - return w.Writer.Write(src[:1]) -} - -func TestSingleByteWriter(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{singleByteWriter{&dst}} - n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) - // FromIOWriter should stop after the singleByteWriter returns (1, nil) - // for a 3-byte write. - if wantN := uint64(1); n != wantN || err != nil { - t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("f"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -func TestWriteFullToBlocks(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{singleByteWriter{&dst}} - n, err := WriteFullFromBlocks(w, BlockSeqFromSlice(srcs)) - // WriteFullToBlocks should call into FromIOWriter => singleByteWriter - // repeatedly until srcs is exhausted. - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("WriteFullFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("foobar"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} diff --git a/pkg/sentry/safemem/safemem_state_autogen.go b/pkg/sentry/safemem/safemem_state_autogen.go new file mode 100755 index 000000000..7264df0b1 --- /dev/null +++ b/pkg/sentry/safemem/safemem_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package safemem + diff --git a/pkg/sentry/safemem/seq_test.go b/pkg/sentry/safemem/seq_test.go deleted file mode 100644 index eba4bb535..000000000 --- a/pkg/sentry/safemem/seq_test.go +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "bytes" - "reflect" - "testing" -) - -type blockSeqTest struct { - desc string - - pieces []string - haveOffset bool - offset uint64 - haveLimit bool - limit uint64 - - want string -} - -func (t blockSeqTest) NonEmptyByteSlices() [][]byte { - // t is a value, so we can mutate it freely. - slices := make([][]byte, 0, len(t.pieces)) - for _, str := range t.pieces { - if t.haveOffset { - strOff := t.offset - if strOff > uint64(len(str)) { - strOff = uint64(len(str)) - } - str = str[strOff:] - t.offset -= strOff - } - if t.haveLimit { - strLim := t.limit - if strLim > uint64(len(str)) { - strLim = uint64(len(str)) - } - str = str[:strLim] - t.limit -= strLim - } - if len(str) != 0 { - slices = append(slices, []byte(str)) - } - } - return slices -} - -func (t blockSeqTest) BlockSeq() BlockSeq { - blocks := make([]Block, 0, len(t.pieces)) - for _, str := range t.pieces { - blocks = append(blocks, BlockFromSafeSlice([]byte(str))) - } - bs := BlockSeqFromSlice(blocks) - if t.haveOffset { - bs = bs.DropFirst64(t.offset) - } - if t.haveLimit { - bs = bs.TakeFirst64(t.limit) - } - return bs -} - -var blockSeqTests = []blockSeqTest{ - { - desc: "Empty sequence", - }, - { - desc: "Sequence of length 1", - pieces: []string{"foobar"}, - want: "foobar", - }, - { - desc: "Sequence of length 2", - pieces: []string{"foo", "bar"}, - want: "foobar", - }, - { - desc: "Empty Blocks", - pieces: []string{"", "foo", "", "", "bar", ""}, - want: "foobar", - }, - { - desc: "Sequence with non-zero offset", - pieces: []string{"foo", "bar"}, - haveOffset: true, - offset: 2, - want: "obar", - }, - { - desc: "Sequence with non-maximal limit", - pieces: []string{"foo", "bar"}, - haveLimit: true, - limit: 5, - want: "fooba", - }, - { - desc: "Sequence with offset and limit", - pieces: []string{"foo", "bar"}, - haveOffset: true, - offset: 2, - haveLimit: true, - limit: 3, - want: "oba", - }, -} - -func TestBlockSeqNumBytes(t *testing.T) { - for _, test := range blockSeqTests { - t.Run(test.desc, func(t *testing.T) { - if got, want := test.BlockSeq().NumBytes(), uint64(len(test.want)); got != want { - t.Errorf("NumBytes: got %d, wanted %d", got, want) - } - }) - } -} - -func TestBlockSeqIterBlocks(t *testing.T) { - // Tests BlockSeq iteration using Head/Tail. - for _, test := range blockSeqTests { - t.Run(test.desc, func(t *testing.T) { - srcs := test.BlockSeq() - // "Note that a non-nil empty slice and a nil slice ... are not - // deeply equal." - reflect - slices := make([][]byte, 0, 0) - for !srcs.IsEmpty() { - src := srcs.Head() - slices = append(slices, src.ToSlice()) - nextSrcs := srcs.Tail() - if got, want := nextSrcs.NumBytes(), srcs.NumBytes()-uint64(src.Len()); got != want { - t.Fatalf("%v.Tail(): got %v (%d bytes), wanted %d bytes", srcs, nextSrcs, got, want) - } - srcs = nextSrcs - } - if wantSlices := test.NonEmptyByteSlices(); !reflect.DeepEqual(slices, wantSlices) { - t.Errorf("Accumulated slices: got %v, wanted %v", slices, wantSlices) - } - }) - } -} - -func TestBlockSeqIterBytes(t *testing.T) { - // Tests BlockSeq iteration using Head/DropFirst. - for _, test := range blockSeqTests { - t.Run(test.desc, func(t *testing.T) { - srcs := test.BlockSeq() - var dst bytes.Buffer - for !srcs.IsEmpty() { - src := srcs.Head() - var b [1]byte - n, err := Copy(BlockFromSafeSlice(b[:]), src) - if n != 1 || err != nil { - t.Fatalf("Copy: got (%v, %v), wanted (1, nil)", n, err) - } - dst.WriteByte(b[0]) - nextSrcs := srcs.DropFirst(1) - if got, want := nextSrcs.NumBytes(), srcs.NumBytes()-1; got != want { - t.Fatalf("%v.DropFirst(1): got %v (%d bytes), wanted %d bytes", srcs, nextSrcs, got, want) - } - srcs = nextSrcs - } - if got := string(dst.Bytes()); got != test.want { - t.Errorf("Copied string: got %q, wanted %q", got, test.want) - } - }) - } -} - -func TestBlockSeqDropBeyondLimit(t *testing.T) { - blocks := []Block{BlockFromSafeSlice([]byte("123")), BlockFromSafeSlice([]byte("4"))} - bs := BlockSeqFromSlice(blocks) - if got, want := bs.NumBytes(), uint64(4); got != want { - t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) - } - bs = bs.TakeFirst(1) - if got, want := bs.NumBytes(), uint64(1); got != want { - t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) - } - bs = bs.DropFirst(2) - if got, want := bs.NumBytes(), uint64(0); got != want { - t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) - } -} diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sentry/sighandling/BUILD deleted file mode 100644 index f561670c7..000000000 --- a/pkg/sentry/sighandling/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "sighandling", - srcs = [ - "sighandling.go", - "sighandling_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/sighandling", - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/abi/linux"], -) diff --git a/pkg/sentry/sighandling/sighandling_state_autogen.go b/pkg/sentry/sighandling/sighandling_state_autogen.go new file mode 100755 index 000000000..dad4bdda2 --- /dev/null +++ b/pkg/sentry/sighandling/sighandling_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package sighandling + diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD deleted file mode 100644 index 2b03ea87c..000000000 --- a/pkg/sentry/socket/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "socket", - srcs = ["socket.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/tcpip", - ], -) diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD deleted file mode 100644 index 81dbd7309..000000000 --- a/pkg/sentry/socket/control/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "control", - srcs = ["control.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/control", - imports = [ - "gvisor.dev/gvisor/pkg/sentry/fs", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/socket/control/control_state_autogen.go b/pkg/sentry/socket/control/control_state_autogen.go new file mode 100755 index 000000000..c5ecfe700 --- /dev/null +++ b/pkg/sentry/socket/control/control_state_autogen.go @@ -0,0 +1,36 @@ +// automatically generated by stateify. + +package control + +import ( + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +func (x *RightsFiles) save(m state.Map) { + m.SaveValue("", ([]*fs.File)(*x)) +} + +func (x *RightsFiles) load(m state.Map) { + m.LoadValue("", new([]*fs.File), func(y interface{}) { *x = (RightsFiles)(y.([]*fs.File)) }) +} + +func (x *scmCredentials) beforeSave() {} +func (x *scmCredentials) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("kuid", &x.kuid) + m.Save("kgid", &x.kgid) +} + +func (x *scmCredentials) afterLoad() {} +func (x *scmCredentials) load(m state.Map) { + m.Load("t", &x.t) + m.Load("kuid", &x.kuid) + m.Load("kgid", &x.kgid) +} + +func init() { + state.Register("control.RightsFiles", (*RightsFiles)(nil), state.Fns{Save: (*RightsFiles).save, Load: (*RightsFiles).load}) + state.Register("control.scmCredentials", (*scmCredentials)(nil), state.Fns{Save: (*scmCredentials).save, Load: (*scmCredentials).load}) +} diff --git a/pkg/sentry/socket/epsocket/BUILD b/pkg/sentry/socket/epsocket/BUILD deleted file mode 100644 index e927821e1..000000000 --- a/pkg/sentry/socket/epsocket/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "epsocket", - srcs = [ - "device.go", - "epsocket.go", - "provider.go", - "save_restore.go", - "stack.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/epsocket", - visibility = [ - "//pkg/sentry:internal", - ], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/log", - "//pkg/metric", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", - "//pkg/sentry/socket", - "//pkg/sentry/socket/netfilter", - "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/tcpip/header", - "//pkg/tcpip/iptables", - "//pkg/tcpip/network/ipv4", - "//pkg/tcpip/network/ipv6", - "//pkg/tcpip/stack", - "//pkg/tcpip/transport/tcp", - "//pkg/tcpip/transport/udp", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/socket/epsocket/epsocket_state_autogen.go b/pkg/sentry/socket/epsocket/epsocket_state_autogen.go new file mode 100755 index 000000000..c69c145bd --- /dev/null +++ b/pkg/sentry/socket/epsocket/epsocket_state_autogen.go @@ -0,0 +1,56 @@ +// automatically generated by stateify. + +package epsocket + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *SocketOperations) beforeSave() {} +func (x *SocketOperations) save(m state.Map) { + x.beforeSave() + m.Save("SendReceiveTimeout", &x.SendReceiveTimeout) + m.Save("Queue", &x.Queue) + m.Save("family", &x.family) + m.Save("Endpoint", &x.Endpoint) + m.Save("skType", &x.skType) + m.Save("protocol", &x.protocol) + m.Save("readView", &x.readView) + m.Save("readCM", &x.readCM) + m.Save("sender", &x.sender) + m.Save("sockOptTimestamp", &x.sockOptTimestamp) + m.Save("timestampValid", &x.timestampValid) + m.Save("timestampNS", &x.timestampNS) + m.Save("sockOptInq", &x.sockOptInq) +} + +func (x *SocketOperations) afterLoad() {} +func (x *SocketOperations) load(m state.Map) { + m.Load("SendReceiveTimeout", &x.SendReceiveTimeout) + m.Load("Queue", &x.Queue) + m.Load("family", &x.family) + m.Load("Endpoint", &x.Endpoint) + m.Load("skType", &x.skType) + m.Load("protocol", &x.protocol) + m.Load("readView", &x.readView) + m.Load("readCM", &x.readCM) + m.Load("sender", &x.sender) + m.Load("sockOptTimestamp", &x.sockOptTimestamp) + m.Load("timestampValid", &x.timestampValid) + m.Load("timestampNS", &x.timestampNS) + m.Load("sockOptInq", &x.sockOptInq) +} + +func (x *Stack) beforeSave() {} +func (x *Stack) save(m state.Map) { + x.beforeSave() +} + +func (x *Stack) load(m state.Map) { + m.AfterLoad(x.afterLoad) +} + +func init() { + state.Register("epsocket.SocketOperations", (*SocketOperations)(nil), state.Fns{Save: (*SocketOperations).save, Load: (*SocketOperations).load}) + state.Register("epsocket.Stack", (*Stack)(nil), state.Fns{Save: (*Stack).save, Load: (*Stack).load}) +} diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD deleted file mode 100644 index a951f1bb0..000000000 --- a/pkg/sentry/socket/hostinet/BUILD +++ /dev/null @@ -1,37 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "hostinet", - srcs = [ - "device.go", - "hostinet.go", - "save_restore.go", - "socket.go", - "socket_unsafe.go", - "stack.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/hostinet", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/fdnotifier", - "//pkg/log", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", - "//pkg/sentry/socket", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/socket/hostinet/hostinet_state_autogen.go b/pkg/sentry/socket/hostinet/hostinet_state_autogen.go new file mode 100755 index 000000000..0a5c7cdf3 --- /dev/null +++ b/pkg/sentry/socket/hostinet/hostinet_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package hostinet + diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD deleted file mode 100644 index 3021f83e7..000000000 --- a/pkg/sentry/socket/netfilter/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "netfilter", - srcs = [ - "netfilter.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netfilter", - # This target depends on netstack and should only be used by epsocket, - # which is allowed to depend on netstack. - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/kernel", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/tcpip/iptables", - "//pkg/tcpip/stack", - ], -) diff --git a/pkg/sentry/socket/netfilter/netfilter_state_autogen.go b/pkg/sentry/socket/netfilter/netfilter_state_autogen.go new file mode 100755 index 000000000..f3d68dd64 --- /dev/null +++ b/pkg/sentry/socket/netfilter/netfilter_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package netfilter + diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD deleted file mode 100644 index 45ebb2a0e..000000000 --- a/pkg/sentry/socket/netlink/BUILD +++ /dev/null @@ -1,34 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "netlink", - srcs = [ - "message.go", - "provider.go", - "socket.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/netlink/port", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/socket/netlink/netlink_state_autogen.go b/pkg/sentry/socket/netlink/netlink_state_autogen.go new file mode 100755 index 000000000..794187ec0 --- /dev/null +++ b/pkg/sentry/socket/netlink/netlink_state_autogen.go @@ -0,0 +1,38 @@ +// automatically generated by stateify. + +package netlink + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Socket) beforeSave() {} +func (x *Socket) save(m state.Map) { + x.beforeSave() + m.Save("SendReceiveTimeout", &x.SendReceiveTimeout) + m.Save("ports", &x.ports) + m.Save("protocol", &x.protocol) + m.Save("skType", &x.skType) + m.Save("ep", &x.ep) + m.Save("connection", &x.connection) + m.Save("bound", &x.bound) + m.Save("portID", &x.portID) + m.Save("sendBufferSize", &x.sendBufferSize) +} + +func (x *Socket) afterLoad() {} +func (x *Socket) load(m state.Map) { + m.Load("SendReceiveTimeout", &x.SendReceiveTimeout) + m.Load("ports", &x.ports) + m.Load("protocol", &x.protocol) + m.Load("skType", &x.skType) + m.Load("ep", &x.ep) + m.Load("connection", &x.connection) + m.Load("bound", &x.bound) + m.Load("portID", &x.portID) + m.Load("sendBufferSize", &x.sendBufferSize) +} + +func init() { + state.Register("netlink.Socket", (*Socket)(nil), state.Fns{Save: (*Socket).save, Load: (*Socket).load}) +} diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD deleted file mode 100644 index 9e2e12799..000000000 --- a/pkg/sentry/socket/netlink/port/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_library( - name = "port", - srcs = ["port.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port", - visibility = ["//pkg/sentry:internal"], -) - -go_test( - name = "port_test", - srcs = ["port_test.go"], - embed = [":port"], -) diff --git a/pkg/sentry/socket/netlink/port/port_state_autogen.go b/pkg/sentry/socket/netlink/port/port_state_autogen.go new file mode 100755 index 000000000..b4b4a6758 --- /dev/null +++ b/pkg/sentry/socket/netlink/port/port_state_autogen.go @@ -0,0 +1,22 @@ +// automatically generated by stateify. + +package port + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Manager) beforeSave() {} +func (x *Manager) save(m state.Map) { + x.beforeSave() + m.Save("ports", &x.ports) +} + +func (x *Manager) afterLoad() {} +func (x *Manager) load(m state.Map) { + m.Load("ports", &x.ports) +} + +func init() { + state.Register("port.Manager", (*Manager)(nil), state.Fns{Save: (*Manager).save, Load: (*Manager).load}) +} diff --git a/pkg/sentry/socket/netlink/port/port_test.go b/pkg/sentry/socket/netlink/port/port_test.go deleted file mode 100644 index 516f6cd6c..000000000 --- a/pkg/sentry/socket/netlink/port/port_test.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package port - -import ( - "testing" -) - -func TestAllocateHint(t *testing.T) { - m := New() - - // We can get the hint port. - p, ok := m.Allocate(0, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p != 1 { - t.Errorf("m.Allocate(0, 1) got %d want 1", p) - } - - // Hint is taken. - p, ok = m.Allocate(0, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p == 1 { - t.Errorf("m.Allocate(0, 1) got 1 want anything else") - } - - // Hint is available for a different protocol. - p, ok = m.Allocate(1, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p != 1 { - t.Errorf("m.Allocate(1, 1) got %d want 1", p) - } - - m.Release(0, 1) - - // Hint is available again after release. - p, ok = m.Allocate(0, 1) - if !ok { - t.Errorf("m.Allocate got !ok want ok") - } - if p != 1 { - t.Errorf("m.Allocate(0, 1) got %d want 1", p) - } -} - -func TestAllocateExhausted(t *testing.T) { - m := New() - - // Fill all ports (0 is already reserved). - for i := int32(1); i < maxPorts; i++ { - p, ok := m.Allocate(0, i) - if !ok { - t.Fatalf("m.Allocate got !ok want ok") - } - if p != i { - t.Fatalf("m.Allocate(0, %d) got %d want %d", i, p, i) - } - } - - // Now no more can be allocated. - p, ok := m.Allocate(0, 1) - if ok { - t.Errorf("m.Allocate got %d, ok want !ok", p) - } -} diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD deleted file mode 100644 index 5dc8533ec..000000000 --- a/pkg/sentry/socket/netlink/route/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "route", - srcs = ["protocol.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/socket/netlink", - "//pkg/syserr", - ], -) diff --git a/pkg/sentry/socket/netlink/route/route_state_autogen.go b/pkg/sentry/socket/netlink/route/route_state_autogen.go new file mode 100755 index 000000000..f4225cb3c --- /dev/null +++ b/pkg/sentry/socket/netlink/route/route_state_autogen.go @@ -0,0 +1,20 @@ +// automatically generated by stateify. + +package route + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *Protocol) beforeSave() {} +func (x *Protocol) save(m state.Map) { + x.beforeSave() +} + +func (x *Protocol) afterLoad() {} +func (x *Protocol) load(m state.Map) { +} + +func init() { + state.Register("route.Protocol", (*Protocol)(nil), state.Fns{Save: (*Protocol).save, Load: (*Protocol).load}) +} diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD deleted file mode 100644 index 5061dcbde..000000000 --- a/pkg/sentry/socket/rpcinet/BUILD +++ /dev/null @@ -1,59 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "rpcinet", - srcs = [ - "device.go", - "rpcinet.go", - "socket.go", - "stack.go", - "stack_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet", - visibility = ["//pkg/sentry:internal"], - deps = [ - ":syscall_rpc_go_proto", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/hostinet", - "//pkg/sentry/socket/rpcinet/conn", - "//pkg/sentry/socket/rpcinet/notifier", - "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/unet", - "//pkg/waiter", - ], -) - -proto_library( - name = "syscall_rpc_proto", - srcs = ["syscall_rpc.proto"], - visibility = [ - "//visibility:public", - ], -) - -go_proto_library( - name = "syscall_rpc_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto", - proto = ":syscall_rpc_proto", - visibility = [ - "//visibility:public", - ], -) diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD deleted file mode 100644 index 23eadcb1b..000000000 --- a/pkg/sentry/socket/rpcinet/conn/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "conn", - srcs = ["conn.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/binary", - "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", - "//pkg/syserr", - "//pkg/unet", - "@com_github_golang_protobuf//proto:go_default_library", - ], -) diff --git a/pkg/sentry/socket/rpcinet/conn/conn_state_autogen.go b/pkg/sentry/socket/rpcinet/conn/conn_state_autogen.go new file mode 100755 index 000000000..f6c927a60 --- /dev/null +++ b/pkg/sentry/socket/rpcinet/conn/conn_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package conn + diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD deleted file mode 100644 index a3585e10d..000000000 --- a/pkg/sentry/socket/rpcinet/notifier/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "notifier", - srcs = ["notifier.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier", - visibility = ["//:sandbox"], - deps = [ - "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", - "//pkg/sentry/socket/rpcinet/conn", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier_state_autogen.go b/pkg/sentry/socket/rpcinet/notifier/notifier_state_autogen.go new file mode 100755 index 000000000..f108d91c1 --- /dev/null +++ b/pkg/sentry/socket/rpcinet/notifier/notifier_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package notifier + diff --git a/pkg/sentry/socket/rpcinet/rpcinet_state_autogen.go b/pkg/sentry/socket/rpcinet/rpcinet_state_autogen.go new file mode 100755 index 000000000..d3076c7e3 --- /dev/null +++ b/pkg/sentry/socket/rpcinet/rpcinet_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package rpcinet + diff --git a/pkg/sentry/socket/rpcinet/syscall_rpc.proto b/pkg/sentry/socket/rpcinet/syscall_rpc.proto deleted file mode 100644 index 9586f5923..000000000 --- a/pkg/sentry/socket/rpcinet/syscall_rpc.proto +++ /dev/null @@ -1,353 +0,0 @@ -syntax = "proto3"; - -// package syscall_rpc is a set of networking related system calls that can be -// forwarded to a socket gofer. -// -// TODO(b/77963526): Document individual RPCs. -package syscall_rpc; - -message SendmsgRequest { - uint32 fd = 1; - bytes data = 2 [ctype = CORD]; - bytes address = 3; - bool more = 4; - bool end_of_record = 5; -} - -message SendmsgResponse { - oneof result { - uint32 error_number = 1; - uint32 length = 2; - } -} - -message IOCtlRequest { - uint32 fd = 1; - uint32 cmd = 2; - bytes arg = 3; -} - -message IOCtlResponse { - oneof result { - uint32 error_number = 1; - bytes value = 2; - } -} - -message RecvmsgRequest { - uint32 fd = 1; - uint32 length = 2; - bool sender = 3; - bool peek = 4; - bool trunc = 5; - uint32 cmsg_length = 6; -} - -message OpenRequest { - bytes path = 1; - uint32 flags = 2; - uint32 mode = 3; -} - -message OpenResponse { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message ReadRequest { - uint32 fd = 1; - uint32 length = 2; -} - -message ReadResponse { - oneof result { - uint32 error_number = 1; - bytes data = 2 [ctype = CORD]; - } -} - -message ReadFileRequest { - string path = 1; -} - -message ReadFileResponse { - oneof result { - uint32 error_number = 1; - bytes data = 2 [ctype = CORD]; - } -} - -message WriteRequest { - uint32 fd = 1; - bytes data = 2 [ctype = CORD]; -} - -message WriteResponse { - oneof result { - uint32 error_number = 1; - uint32 length = 2; - } -} - -message WriteFileRequest { - string path = 1; - bytes content = 2; -} - -message WriteFileResponse { - uint32 error_number = 1; - uint32 written = 2; -} - -message AddressResponse { - bytes address = 1; - uint32 length = 2; -} - -message RecvmsgResponse { - message ResultPayload { - bytes data = 1 [ctype = CORD]; - AddressResponse address = 2; - uint32 length = 3; - bytes cmsg_data = 4; - } - oneof result { - uint32 error_number = 1; - ResultPayload payload = 2; - } -} - -message BindRequest { - uint32 fd = 1; - bytes address = 2; -} - -message BindResponse { - uint32 error_number = 1; -} - -message AcceptRequest { - uint32 fd = 1; - bool peer = 2; - int64 flags = 3; -} - -message AcceptResponse { - message ResultPayload { - uint32 fd = 1; - AddressResponse address = 2; - } - oneof result { - uint32 error_number = 1; - ResultPayload payload = 2; - } -} - -message ConnectRequest { - uint32 fd = 1; - bytes address = 2; -} - -message ConnectResponse { - uint32 error_number = 1; -} - -message ListenRequest { - uint32 fd = 1; - int64 backlog = 2; -} - -message ListenResponse { - uint32 error_number = 1; -} - -message ShutdownRequest { - uint32 fd = 1; - int64 how = 2; -} - -message ShutdownResponse { - uint32 error_number = 1; -} - -message CloseRequest { - uint32 fd = 1; -} - -message CloseResponse { - uint32 error_number = 1; -} - -message GetSockOptRequest { - uint32 fd = 1; - int64 level = 2; - int64 name = 3; - uint32 length = 4; -} - -message GetSockOptResponse { - oneof result { - uint32 error_number = 1; - bytes opt = 2; - } -} - -message SetSockOptRequest { - uint32 fd = 1; - int64 level = 2; - int64 name = 3; - bytes opt = 4; -} - -message SetSockOptResponse { - uint32 error_number = 1; -} - -message GetSockNameRequest { - uint32 fd = 1; -} - -message GetSockNameResponse { - oneof result { - uint32 error_number = 1; - AddressResponse address = 2; - } -} - -message GetPeerNameRequest { - uint32 fd = 1; -} - -message GetPeerNameResponse { - oneof result { - uint32 error_number = 1; - AddressResponse address = 2; - } -} - -message SocketRequest { - int64 family = 1; - int64 type = 2; - int64 protocol = 3; -} - -message SocketResponse { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message EpollWaitRequest { - uint32 fd = 1; - uint32 num_events = 2; - sint64 msec = 3; -} - -message EpollEvent { - uint32 fd = 1; - uint32 events = 2; -} - -message EpollEvents { - repeated EpollEvent events = 1; -} - -message EpollWaitResponse { - oneof result { - uint32 error_number = 1; - EpollEvents events = 2; - } -} - -message EpollCtlRequest { - uint32 epfd = 1; - int64 op = 2; - uint32 fd = 3; - EpollEvent event = 4; -} - -message EpollCtlResponse { - uint32 error_number = 1; -} - -message EpollCreate1Request { - int64 flag = 1; -} - -message EpollCreate1Response { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message PollRequest { - uint32 fd = 1; - uint32 events = 2; -} - -message PollResponse { - oneof result { - uint32 error_number = 1; - uint32 events = 2; - } -} - -message SyscallRequest { - oneof args { - SocketRequest socket = 1; - SendmsgRequest sendmsg = 2; - RecvmsgRequest recvmsg = 3; - BindRequest bind = 4; - AcceptRequest accept = 5; - ConnectRequest connect = 6; - ListenRequest listen = 7; - ShutdownRequest shutdown = 8; - CloseRequest close = 9; - GetSockOptRequest get_sock_opt = 10; - SetSockOptRequest set_sock_opt = 11; - GetSockNameRequest get_sock_name = 12; - GetPeerNameRequest get_peer_name = 13; - EpollWaitRequest epoll_wait = 14; - EpollCtlRequest epoll_ctl = 15; - EpollCreate1Request epoll_create1 = 16; - PollRequest poll = 17; - ReadRequest read = 18; - WriteRequest write = 19; - OpenRequest open = 20; - IOCtlRequest ioctl = 21; - WriteFileRequest write_file = 22; - ReadFileRequest read_file = 23; - } -} - -message SyscallResponse { - oneof result { - SocketResponse socket = 1; - SendmsgResponse sendmsg = 2; - RecvmsgResponse recvmsg = 3; - BindResponse bind = 4; - AcceptResponse accept = 5; - ConnectResponse connect = 6; - ListenResponse listen = 7; - ShutdownResponse shutdown = 8; - CloseResponse close = 9; - GetSockOptResponse get_sock_opt = 10; - SetSockOptResponse set_sock_opt = 11; - GetSockNameResponse get_sock_name = 12; - GetPeerNameResponse get_peer_name = 13; - EpollWaitResponse epoll_wait = 14; - EpollCtlResponse epoll_ctl = 15; - EpollCreate1Response epoll_create1 = 16; - PollResponse poll = 17; - ReadResponse read = 18; - WriteResponse write = 19; - OpenResponse open = 20; - IOCtlResponse ioctl = 21; - WriteFileResponse write_file = 22; - ReadFileResponse read_file = 23; - } -} diff --git a/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto/syscall_rpc.pb.go b/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto/syscall_rpc.pb.go new file mode 100755 index 000000000..fb68d5294 --- /dev/null +++ b/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto/syscall_rpc.pb.go @@ -0,0 +1,3938 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: pkg/sentry/socket/rpcinet/syscall_rpc.proto + +package syscall_rpc + +import ( + fmt "fmt" + proto "github.com/golang/protobuf/proto" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package + +type SendmsgRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` + Address []byte `protobuf:"bytes,3,opt,name=address,proto3" json:"address,omitempty"` + More bool `protobuf:"varint,4,opt,name=more,proto3" json:"more,omitempty"` + EndOfRecord bool `protobuf:"varint,5,opt,name=end_of_record,json=endOfRecord,proto3" json:"end_of_record,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SendmsgRequest) Reset() { *m = SendmsgRequest{} } +func (m *SendmsgRequest) String() string { return proto.CompactTextString(m) } +func (*SendmsgRequest) ProtoMessage() {} +func (*SendmsgRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{0} +} + +func (m *SendmsgRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SendmsgRequest.Unmarshal(m, b) +} +func (m *SendmsgRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SendmsgRequest.Marshal(b, m, deterministic) +} +func (m *SendmsgRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_SendmsgRequest.Merge(m, src) +} +func (m *SendmsgRequest) XXX_Size() int { + return xxx_messageInfo_SendmsgRequest.Size(m) +} +func (m *SendmsgRequest) XXX_DiscardUnknown() { + xxx_messageInfo_SendmsgRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_SendmsgRequest proto.InternalMessageInfo + +func (m *SendmsgRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *SendmsgRequest) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +func (m *SendmsgRequest) GetAddress() []byte { + if m != nil { + return m.Address + } + return nil +} + +func (m *SendmsgRequest) GetMore() bool { + if m != nil { + return m.More + } + return false +} + +func (m *SendmsgRequest) GetEndOfRecord() bool { + if m != nil { + return m.EndOfRecord + } + return false +} + +type SendmsgResponse struct { + // Types that are valid to be assigned to Result: + // *SendmsgResponse_ErrorNumber + // *SendmsgResponse_Length + Result isSendmsgResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SendmsgResponse) Reset() { *m = SendmsgResponse{} } +func (m *SendmsgResponse) String() string { return proto.CompactTextString(m) } +func (*SendmsgResponse) ProtoMessage() {} +func (*SendmsgResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{1} +} + +func (m *SendmsgResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SendmsgResponse.Unmarshal(m, b) +} +func (m *SendmsgResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SendmsgResponse.Marshal(b, m, deterministic) +} +func (m *SendmsgResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_SendmsgResponse.Merge(m, src) +} +func (m *SendmsgResponse) XXX_Size() int { + return xxx_messageInfo_SendmsgResponse.Size(m) +} +func (m *SendmsgResponse) XXX_DiscardUnknown() { + xxx_messageInfo_SendmsgResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_SendmsgResponse proto.InternalMessageInfo + +type isSendmsgResponse_Result interface { + isSendmsgResponse_Result() +} + +type SendmsgResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type SendmsgResponse_Length struct { + Length uint32 `protobuf:"varint,2,opt,name=length,proto3,oneof"` +} + +func (*SendmsgResponse_ErrorNumber) isSendmsgResponse_Result() {} + +func (*SendmsgResponse_Length) isSendmsgResponse_Result() {} + +func (m *SendmsgResponse) GetResult() isSendmsgResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *SendmsgResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*SendmsgResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *SendmsgResponse) GetLength() uint32 { + if x, ok := m.GetResult().(*SendmsgResponse_Length); ok { + return x.Length + } + return 0 +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*SendmsgResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*SendmsgResponse_ErrorNumber)(nil), + (*SendmsgResponse_Length)(nil), + } +} + +type IOCtlRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Cmd uint32 `protobuf:"varint,2,opt,name=cmd,proto3" json:"cmd,omitempty"` + Arg []byte `protobuf:"bytes,3,opt,name=arg,proto3" json:"arg,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *IOCtlRequest) Reset() { *m = IOCtlRequest{} } +func (m *IOCtlRequest) String() string { return proto.CompactTextString(m) } +func (*IOCtlRequest) ProtoMessage() {} +func (*IOCtlRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{2} +} + +func (m *IOCtlRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_IOCtlRequest.Unmarshal(m, b) +} +func (m *IOCtlRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_IOCtlRequest.Marshal(b, m, deterministic) +} +func (m *IOCtlRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_IOCtlRequest.Merge(m, src) +} +func (m *IOCtlRequest) XXX_Size() int { + return xxx_messageInfo_IOCtlRequest.Size(m) +} +func (m *IOCtlRequest) XXX_DiscardUnknown() { + xxx_messageInfo_IOCtlRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_IOCtlRequest proto.InternalMessageInfo + +func (m *IOCtlRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *IOCtlRequest) GetCmd() uint32 { + if m != nil { + return m.Cmd + } + return 0 +} + +func (m *IOCtlRequest) GetArg() []byte { + if m != nil { + return m.Arg + } + return nil +} + +type IOCtlResponse struct { + // Types that are valid to be assigned to Result: + // *IOCtlResponse_ErrorNumber + // *IOCtlResponse_Value + Result isIOCtlResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *IOCtlResponse) Reset() { *m = IOCtlResponse{} } +func (m *IOCtlResponse) String() string { return proto.CompactTextString(m) } +func (*IOCtlResponse) ProtoMessage() {} +func (*IOCtlResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{3} +} + +func (m *IOCtlResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_IOCtlResponse.Unmarshal(m, b) +} +func (m *IOCtlResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_IOCtlResponse.Marshal(b, m, deterministic) +} +func (m *IOCtlResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_IOCtlResponse.Merge(m, src) +} +func (m *IOCtlResponse) XXX_Size() int { + return xxx_messageInfo_IOCtlResponse.Size(m) +} +func (m *IOCtlResponse) XXX_DiscardUnknown() { + xxx_messageInfo_IOCtlResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_IOCtlResponse proto.InternalMessageInfo + +type isIOCtlResponse_Result interface { + isIOCtlResponse_Result() +} + +type IOCtlResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type IOCtlResponse_Value struct { + Value []byte `protobuf:"bytes,2,opt,name=value,proto3,oneof"` +} + +func (*IOCtlResponse_ErrorNumber) isIOCtlResponse_Result() {} + +func (*IOCtlResponse_Value) isIOCtlResponse_Result() {} + +func (m *IOCtlResponse) GetResult() isIOCtlResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *IOCtlResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*IOCtlResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *IOCtlResponse) GetValue() []byte { + if x, ok := m.GetResult().(*IOCtlResponse_Value); ok { + return x.Value + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*IOCtlResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*IOCtlResponse_ErrorNumber)(nil), + (*IOCtlResponse_Value)(nil), + } +} + +type RecvmsgRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Length uint32 `protobuf:"varint,2,opt,name=length,proto3" json:"length,omitempty"` + Sender bool `protobuf:"varint,3,opt,name=sender,proto3" json:"sender,omitempty"` + Peek bool `protobuf:"varint,4,opt,name=peek,proto3" json:"peek,omitempty"` + Trunc bool `protobuf:"varint,5,opt,name=trunc,proto3" json:"trunc,omitempty"` + CmsgLength uint32 `protobuf:"varint,6,opt,name=cmsg_length,json=cmsgLength,proto3" json:"cmsg_length,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *RecvmsgRequest) Reset() { *m = RecvmsgRequest{} } +func (m *RecvmsgRequest) String() string { return proto.CompactTextString(m) } +func (*RecvmsgRequest) ProtoMessage() {} +func (*RecvmsgRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{4} +} + +func (m *RecvmsgRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_RecvmsgRequest.Unmarshal(m, b) +} +func (m *RecvmsgRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_RecvmsgRequest.Marshal(b, m, deterministic) +} +func (m *RecvmsgRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_RecvmsgRequest.Merge(m, src) +} +func (m *RecvmsgRequest) XXX_Size() int { + return xxx_messageInfo_RecvmsgRequest.Size(m) +} +func (m *RecvmsgRequest) XXX_DiscardUnknown() { + xxx_messageInfo_RecvmsgRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_RecvmsgRequest proto.InternalMessageInfo + +func (m *RecvmsgRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *RecvmsgRequest) GetLength() uint32 { + if m != nil { + return m.Length + } + return 0 +} + +func (m *RecvmsgRequest) GetSender() bool { + if m != nil { + return m.Sender + } + return false +} + +func (m *RecvmsgRequest) GetPeek() bool { + if m != nil { + return m.Peek + } + return false +} + +func (m *RecvmsgRequest) GetTrunc() bool { + if m != nil { + return m.Trunc + } + return false +} + +func (m *RecvmsgRequest) GetCmsgLength() uint32 { + if m != nil { + return m.CmsgLength + } + return 0 +} + +type OpenRequest struct { + Path []byte `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"` + Flags uint32 `protobuf:"varint,2,opt,name=flags,proto3" json:"flags,omitempty"` + Mode uint32 `protobuf:"varint,3,opt,name=mode,proto3" json:"mode,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *OpenRequest) Reset() { *m = OpenRequest{} } +func (m *OpenRequest) String() string { return proto.CompactTextString(m) } +func (*OpenRequest) ProtoMessage() {} +func (*OpenRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{5} +} + +func (m *OpenRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_OpenRequest.Unmarshal(m, b) +} +func (m *OpenRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_OpenRequest.Marshal(b, m, deterministic) +} +func (m *OpenRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_OpenRequest.Merge(m, src) +} +func (m *OpenRequest) XXX_Size() int { + return xxx_messageInfo_OpenRequest.Size(m) +} +func (m *OpenRequest) XXX_DiscardUnknown() { + xxx_messageInfo_OpenRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_OpenRequest proto.InternalMessageInfo + +func (m *OpenRequest) GetPath() []byte { + if m != nil { + return m.Path + } + return nil +} + +func (m *OpenRequest) GetFlags() uint32 { + if m != nil { + return m.Flags + } + return 0 +} + +func (m *OpenRequest) GetMode() uint32 { + if m != nil { + return m.Mode + } + return 0 +} + +type OpenResponse struct { + // Types that are valid to be assigned to Result: + // *OpenResponse_ErrorNumber + // *OpenResponse_Fd + Result isOpenResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *OpenResponse) Reset() { *m = OpenResponse{} } +func (m *OpenResponse) String() string { return proto.CompactTextString(m) } +func (*OpenResponse) ProtoMessage() {} +func (*OpenResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{6} +} + +func (m *OpenResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_OpenResponse.Unmarshal(m, b) +} +func (m *OpenResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_OpenResponse.Marshal(b, m, deterministic) +} +func (m *OpenResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_OpenResponse.Merge(m, src) +} +func (m *OpenResponse) XXX_Size() int { + return xxx_messageInfo_OpenResponse.Size(m) +} +func (m *OpenResponse) XXX_DiscardUnknown() { + xxx_messageInfo_OpenResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_OpenResponse proto.InternalMessageInfo + +type isOpenResponse_Result interface { + isOpenResponse_Result() +} + +type OpenResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type OpenResponse_Fd struct { + Fd uint32 `protobuf:"varint,2,opt,name=fd,proto3,oneof"` +} + +func (*OpenResponse_ErrorNumber) isOpenResponse_Result() {} + +func (*OpenResponse_Fd) isOpenResponse_Result() {} + +func (m *OpenResponse) GetResult() isOpenResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *OpenResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*OpenResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *OpenResponse) GetFd() uint32 { + if x, ok := m.GetResult().(*OpenResponse_Fd); ok { + return x.Fd + } + return 0 +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*OpenResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*OpenResponse_ErrorNumber)(nil), + (*OpenResponse_Fd)(nil), + } +} + +type ReadRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Length uint32 `protobuf:"varint,2,opt,name=length,proto3" json:"length,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ReadRequest) Reset() { *m = ReadRequest{} } +func (m *ReadRequest) String() string { return proto.CompactTextString(m) } +func (*ReadRequest) ProtoMessage() {} +func (*ReadRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{7} +} + +func (m *ReadRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ReadRequest.Unmarshal(m, b) +} +func (m *ReadRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ReadRequest.Marshal(b, m, deterministic) +} +func (m *ReadRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ReadRequest.Merge(m, src) +} +func (m *ReadRequest) XXX_Size() int { + return xxx_messageInfo_ReadRequest.Size(m) +} +func (m *ReadRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ReadRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ReadRequest proto.InternalMessageInfo + +func (m *ReadRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *ReadRequest) GetLength() uint32 { + if m != nil { + return m.Length + } + return 0 +} + +type ReadResponse struct { + // Types that are valid to be assigned to Result: + // *ReadResponse_ErrorNumber + // *ReadResponse_Data + Result isReadResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ReadResponse) Reset() { *m = ReadResponse{} } +func (m *ReadResponse) String() string { return proto.CompactTextString(m) } +func (*ReadResponse) ProtoMessage() {} +func (*ReadResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{8} +} + +func (m *ReadResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ReadResponse.Unmarshal(m, b) +} +func (m *ReadResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ReadResponse.Marshal(b, m, deterministic) +} +func (m *ReadResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ReadResponse.Merge(m, src) +} +func (m *ReadResponse) XXX_Size() int { + return xxx_messageInfo_ReadResponse.Size(m) +} +func (m *ReadResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ReadResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ReadResponse proto.InternalMessageInfo + +type isReadResponse_Result interface { + isReadResponse_Result() +} + +type ReadResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type ReadResponse_Data struct { + Data []byte `protobuf:"bytes,2,opt,name=data,proto3,oneof"` +} + +func (*ReadResponse_ErrorNumber) isReadResponse_Result() {} + +func (*ReadResponse_Data) isReadResponse_Result() {} + +func (m *ReadResponse) GetResult() isReadResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *ReadResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*ReadResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *ReadResponse) GetData() []byte { + if x, ok := m.GetResult().(*ReadResponse_Data); ok { + return x.Data + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*ReadResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*ReadResponse_ErrorNumber)(nil), + (*ReadResponse_Data)(nil), + } +} + +type ReadFileRequest struct { + Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ReadFileRequest) Reset() { *m = ReadFileRequest{} } +func (m *ReadFileRequest) String() string { return proto.CompactTextString(m) } +func (*ReadFileRequest) ProtoMessage() {} +func (*ReadFileRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{9} +} + +func (m *ReadFileRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ReadFileRequest.Unmarshal(m, b) +} +func (m *ReadFileRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ReadFileRequest.Marshal(b, m, deterministic) +} +func (m *ReadFileRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ReadFileRequest.Merge(m, src) +} +func (m *ReadFileRequest) XXX_Size() int { + return xxx_messageInfo_ReadFileRequest.Size(m) +} +func (m *ReadFileRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ReadFileRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ReadFileRequest proto.InternalMessageInfo + +func (m *ReadFileRequest) GetPath() string { + if m != nil { + return m.Path + } + return "" +} + +type ReadFileResponse struct { + // Types that are valid to be assigned to Result: + // *ReadFileResponse_ErrorNumber + // *ReadFileResponse_Data + Result isReadFileResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ReadFileResponse) Reset() { *m = ReadFileResponse{} } +func (m *ReadFileResponse) String() string { return proto.CompactTextString(m) } +func (*ReadFileResponse) ProtoMessage() {} +func (*ReadFileResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{10} +} + +func (m *ReadFileResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ReadFileResponse.Unmarshal(m, b) +} +func (m *ReadFileResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ReadFileResponse.Marshal(b, m, deterministic) +} +func (m *ReadFileResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ReadFileResponse.Merge(m, src) +} +func (m *ReadFileResponse) XXX_Size() int { + return xxx_messageInfo_ReadFileResponse.Size(m) +} +func (m *ReadFileResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ReadFileResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ReadFileResponse proto.InternalMessageInfo + +type isReadFileResponse_Result interface { + isReadFileResponse_Result() +} + +type ReadFileResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type ReadFileResponse_Data struct { + Data []byte `protobuf:"bytes,2,opt,name=data,proto3,oneof"` +} + +func (*ReadFileResponse_ErrorNumber) isReadFileResponse_Result() {} + +func (*ReadFileResponse_Data) isReadFileResponse_Result() {} + +func (m *ReadFileResponse) GetResult() isReadFileResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *ReadFileResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*ReadFileResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *ReadFileResponse) GetData() []byte { + if x, ok := m.GetResult().(*ReadFileResponse_Data); ok { + return x.Data + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*ReadFileResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*ReadFileResponse_ErrorNumber)(nil), + (*ReadFileResponse_Data)(nil), + } +} + +type WriteRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *WriteRequest) Reset() { *m = WriteRequest{} } +func (m *WriteRequest) String() string { return proto.CompactTextString(m) } +func (*WriteRequest) ProtoMessage() {} +func (*WriteRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{11} +} + +func (m *WriteRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_WriteRequest.Unmarshal(m, b) +} +func (m *WriteRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_WriteRequest.Marshal(b, m, deterministic) +} +func (m *WriteRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_WriteRequest.Merge(m, src) +} +func (m *WriteRequest) XXX_Size() int { + return xxx_messageInfo_WriteRequest.Size(m) +} +func (m *WriteRequest) XXX_DiscardUnknown() { + xxx_messageInfo_WriteRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_WriteRequest proto.InternalMessageInfo + +func (m *WriteRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *WriteRequest) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +type WriteResponse struct { + // Types that are valid to be assigned to Result: + // *WriteResponse_ErrorNumber + // *WriteResponse_Length + Result isWriteResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *WriteResponse) Reset() { *m = WriteResponse{} } +func (m *WriteResponse) String() string { return proto.CompactTextString(m) } +func (*WriteResponse) ProtoMessage() {} +func (*WriteResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{12} +} + +func (m *WriteResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_WriteResponse.Unmarshal(m, b) +} +func (m *WriteResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_WriteResponse.Marshal(b, m, deterministic) +} +func (m *WriteResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_WriteResponse.Merge(m, src) +} +func (m *WriteResponse) XXX_Size() int { + return xxx_messageInfo_WriteResponse.Size(m) +} +func (m *WriteResponse) XXX_DiscardUnknown() { + xxx_messageInfo_WriteResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_WriteResponse proto.InternalMessageInfo + +type isWriteResponse_Result interface { + isWriteResponse_Result() +} + +type WriteResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type WriteResponse_Length struct { + Length uint32 `protobuf:"varint,2,opt,name=length,proto3,oneof"` +} + +func (*WriteResponse_ErrorNumber) isWriteResponse_Result() {} + +func (*WriteResponse_Length) isWriteResponse_Result() {} + +func (m *WriteResponse) GetResult() isWriteResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *WriteResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*WriteResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *WriteResponse) GetLength() uint32 { + if x, ok := m.GetResult().(*WriteResponse_Length); ok { + return x.Length + } + return 0 +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*WriteResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*WriteResponse_ErrorNumber)(nil), + (*WriteResponse_Length)(nil), + } +} + +type WriteFileRequest struct { + Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"` + Content []byte `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *WriteFileRequest) Reset() { *m = WriteFileRequest{} } +func (m *WriteFileRequest) String() string { return proto.CompactTextString(m) } +func (*WriteFileRequest) ProtoMessage() {} +func (*WriteFileRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{13} +} + +func (m *WriteFileRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_WriteFileRequest.Unmarshal(m, b) +} +func (m *WriteFileRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_WriteFileRequest.Marshal(b, m, deterministic) +} +func (m *WriteFileRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_WriteFileRequest.Merge(m, src) +} +func (m *WriteFileRequest) XXX_Size() int { + return xxx_messageInfo_WriteFileRequest.Size(m) +} +func (m *WriteFileRequest) XXX_DiscardUnknown() { + xxx_messageInfo_WriteFileRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_WriteFileRequest proto.InternalMessageInfo + +func (m *WriteFileRequest) GetPath() string { + if m != nil { + return m.Path + } + return "" +} + +func (m *WriteFileRequest) GetContent() []byte { + if m != nil { + return m.Content + } + return nil +} + +type WriteFileResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + Written uint32 `protobuf:"varint,2,opt,name=written,proto3" json:"written,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *WriteFileResponse) Reset() { *m = WriteFileResponse{} } +func (m *WriteFileResponse) String() string { return proto.CompactTextString(m) } +func (*WriteFileResponse) ProtoMessage() {} +func (*WriteFileResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{14} +} + +func (m *WriteFileResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_WriteFileResponse.Unmarshal(m, b) +} +func (m *WriteFileResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_WriteFileResponse.Marshal(b, m, deterministic) +} +func (m *WriteFileResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_WriteFileResponse.Merge(m, src) +} +func (m *WriteFileResponse) XXX_Size() int { + return xxx_messageInfo_WriteFileResponse.Size(m) +} +func (m *WriteFileResponse) XXX_DiscardUnknown() { + xxx_messageInfo_WriteFileResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_WriteFileResponse proto.InternalMessageInfo + +func (m *WriteFileResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +func (m *WriteFileResponse) GetWritten() uint32 { + if m != nil { + return m.Written + } + return 0 +} + +type AddressResponse struct { + Address []byte `protobuf:"bytes,1,opt,name=address,proto3" json:"address,omitempty"` + Length uint32 `protobuf:"varint,2,opt,name=length,proto3" json:"length,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *AddressResponse) Reset() { *m = AddressResponse{} } +func (m *AddressResponse) String() string { return proto.CompactTextString(m) } +func (*AddressResponse) ProtoMessage() {} +func (*AddressResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{15} +} + +func (m *AddressResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_AddressResponse.Unmarshal(m, b) +} +func (m *AddressResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_AddressResponse.Marshal(b, m, deterministic) +} +func (m *AddressResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_AddressResponse.Merge(m, src) +} +func (m *AddressResponse) XXX_Size() int { + return xxx_messageInfo_AddressResponse.Size(m) +} +func (m *AddressResponse) XXX_DiscardUnknown() { + xxx_messageInfo_AddressResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_AddressResponse proto.InternalMessageInfo + +func (m *AddressResponse) GetAddress() []byte { + if m != nil { + return m.Address + } + return nil +} + +func (m *AddressResponse) GetLength() uint32 { + if m != nil { + return m.Length + } + return 0 +} + +type RecvmsgResponse struct { + // Types that are valid to be assigned to Result: + // *RecvmsgResponse_ErrorNumber + // *RecvmsgResponse_Payload + Result isRecvmsgResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *RecvmsgResponse) Reset() { *m = RecvmsgResponse{} } +func (m *RecvmsgResponse) String() string { return proto.CompactTextString(m) } +func (*RecvmsgResponse) ProtoMessage() {} +func (*RecvmsgResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{16} +} + +func (m *RecvmsgResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_RecvmsgResponse.Unmarshal(m, b) +} +func (m *RecvmsgResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_RecvmsgResponse.Marshal(b, m, deterministic) +} +func (m *RecvmsgResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_RecvmsgResponse.Merge(m, src) +} +func (m *RecvmsgResponse) XXX_Size() int { + return xxx_messageInfo_RecvmsgResponse.Size(m) +} +func (m *RecvmsgResponse) XXX_DiscardUnknown() { + xxx_messageInfo_RecvmsgResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_RecvmsgResponse proto.InternalMessageInfo + +type isRecvmsgResponse_Result interface { + isRecvmsgResponse_Result() +} + +type RecvmsgResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type RecvmsgResponse_Payload struct { + Payload *RecvmsgResponse_ResultPayload `protobuf:"bytes,2,opt,name=payload,proto3,oneof"` +} + +func (*RecvmsgResponse_ErrorNumber) isRecvmsgResponse_Result() {} + +func (*RecvmsgResponse_Payload) isRecvmsgResponse_Result() {} + +func (m *RecvmsgResponse) GetResult() isRecvmsgResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *RecvmsgResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*RecvmsgResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *RecvmsgResponse) GetPayload() *RecvmsgResponse_ResultPayload { + if x, ok := m.GetResult().(*RecvmsgResponse_Payload); ok { + return x.Payload + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*RecvmsgResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*RecvmsgResponse_ErrorNumber)(nil), + (*RecvmsgResponse_Payload)(nil), + } +} + +type RecvmsgResponse_ResultPayload struct { + Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + Address *AddressResponse `protobuf:"bytes,2,opt,name=address,proto3" json:"address,omitempty"` + Length uint32 `protobuf:"varint,3,opt,name=length,proto3" json:"length,omitempty"` + CmsgData []byte `protobuf:"bytes,4,opt,name=cmsg_data,json=cmsgData,proto3" json:"cmsg_data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *RecvmsgResponse_ResultPayload) Reset() { *m = RecvmsgResponse_ResultPayload{} } +func (m *RecvmsgResponse_ResultPayload) String() string { return proto.CompactTextString(m) } +func (*RecvmsgResponse_ResultPayload) ProtoMessage() {} +func (*RecvmsgResponse_ResultPayload) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{16, 0} +} + +func (m *RecvmsgResponse_ResultPayload) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_RecvmsgResponse_ResultPayload.Unmarshal(m, b) +} +func (m *RecvmsgResponse_ResultPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_RecvmsgResponse_ResultPayload.Marshal(b, m, deterministic) +} +func (m *RecvmsgResponse_ResultPayload) XXX_Merge(src proto.Message) { + xxx_messageInfo_RecvmsgResponse_ResultPayload.Merge(m, src) +} +func (m *RecvmsgResponse_ResultPayload) XXX_Size() int { + return xxx_messageInfo_RecvmsgResponse_ResultPayload.Size(m) +} +func (m *RecvmsgResponse_ResultPayload) XXX_DiscardUnknown() { + xxx_messageInfo_RecvmsgResponse_ResultPayload.DiscardUnknown(m) +} + +var xxx_messageInfo_RecvmsgResponse_ResultPayload proto.InternalMessageInfo + +func (m *RecvmsgResponse_ResultPayload) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +func (m *RecvmsgResponse_ResultPayload) GetAddress() *AddressResponse { + if m != nil { + return m.Address + } + return nil +} + +func (m *RecvmsgResponse_ResultPayload) GetLength() uint32 { + if m != nil { + return m.Length + } + return 0 +} + +func (m *RecvmsgResponse_ResultPayload) GetCmsgData() []byte { + if m != nil { + return m.CmsgData + } + return nil +} + +type BindRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Address []byte `protobuf:"bytes,2,opt,name=address,proto3" json:"address,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *BindRequest) Reset() { *m = BindRequest{} } +func (m *BindRequest) String() string { return proto.CompactTextString(m) } +func (*BindRequest) ProtoMessage() {} +func (*BindRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{17} +} + +func (m *BindRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_BindRequest.Unmarshal(m, b) +} +func (m *BindRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_BindRequest.Marshal(b, m, deterministic) +} +func (m *BindRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_BindRequest.Merge(m, src) +} +func (m *BindRequest) XXX_Size() int { + return xxx_messageInfo_BindRequest.Size(m) +} +func (m *BindRequest) XXX_DiscardUnknown() { + xxx_messageInfo_BindRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_BindRequest proto.InternalMessageInfo + +func (m *BindRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *BindRequest) GetAddress() []byte { + if m != nil { + return m.Address + } + return nil +} + +type BindResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *BindResponse) Reset() { *m = BindResponse{} } +func (m *BindResponse) String() string { return proto.CompactTextString(m) } +func (*BindResponse) ProtoMessage() {} +func (*BindResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{18} +} + +func (m *BindResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_BindResponse.Unmarshal(m, b) +} +func (m *BindResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_BindResponse.Marshal(b, m, deterministic) +} +func (m *BindResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_BindResponse.Merge(m, src) +} +func (m *BindResponse) XXX_Size() int { + return xxx_messageInfo_BindResponse.Size(m) +} +func (m *BindResponse) XXX_DiscardUnknown() { + xxx_messageInfo_BindResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_BindResponse proto.InternalMessageInfo + +func (m *BindResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type AcceptRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Peer bool `protobuf:"varint,2,opt,name=peer,proto3" json:"peer,omitempty"` + Flags int64 `protobuf:"varint,3,opt,name=flags,proto3" json:"flags,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *AcceptRequest) Reset() { *m = AcceptRequest{} } +func (m *AcceptRequest) String() string { return proto.CompactTextString(m) } +func (*AcceptRequest) ProtoMessage() {} +func (*AcceptRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{19} +} + +func (m *AcceptRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_AcceptRequest.Unmarshal(m, b) +} +func (m *AcceptRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_AcceptRequest.Marshal(b, m, deterministic) +} +func (m *AcceptRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_AcceptRequest.Merge(m, src) +} +func (m *AcceptRequest) XXX_Size() int { + return xxx_messageInfo_AcceptRequest.Size(m) +} +func (m *AcceptRequest) XXX_DiscardUnknown() { + xxx_messageInfo_AcceptRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_AcceptRequest proto.InternalMessageInfo + +func (m *AcceptRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *AcceptRequest) GetPeer() bool { + if m != nil { + return m.Peer + } + return false +} + +func (m *AcceptRequest) GetFlags() int64 { + if m != nil { + return m.Flags + } + return 0 +} + +type AcceptResponse struct { + // Types that are valid to be assigned to Result: + // *AcceptResponse_ErrorNumber + // *AcceptResponse_Payload + Result isAcceptResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *AcceptResponse) Reset() { *m = AcceptResponse{} } +func (m *AcceptResponse) String() string { return proto.CompactTextString(m) } +func (*AcceptResponse) ProtoMessage() {} +func (*AcceptResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{20} +} + +func (m *AcceptResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_AcceptResponse.Unmarshal(m, b) +} +func (m *AcceptResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_AcceptResponse.Marshal(b, m, deterministic) +} +func (m *AcceptResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_AcceptResponse.Merge(m, src) +} +func (m *AcceptResponse) XXX_Size() int { + return xxx_messageInfo_AcceptResponse.Size(m) +} +func (m *AcceptResponse) XXX_DiscardUnknown() { + xxx_messageInfo_AcceptResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_AcceptResponse proto.InternalMessageInfo + +type isAcceptResponse_Result interface { + isAcceptResponse_Result() +} + +type AcceptResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type AcceptResponse_Payload struct { + Payload *AcceptResponse_ResultPayload `protobuf:"bytes,2,opt,name=payload,proto3,oneof"` +} + +func (*AcceptResponse_ErrorNumber) isAcceptResponse_Result() {} + +func (*AcceptResponse_Payload) isAcceptResponse_Result() {} + +func (m *AcceptResponse) GetResult() isAcceptResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *AcceptResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*AcceptResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *AcceptResponse) GetPayload() *AcceptResponse_ResultPayload { + if x, ok := m.GetResult().(*AcceptResponse_Payload); ok { + return x.Payload + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*AcceptResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*AcceptResponse_ErrorNumber)(nil), + (*AcceptResponse_Payload)(nil), + } +} + +type AcceptResponse_ResultPayload struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Address *AddressResponse `protobuf:"bytes,2,opt,name=address,proto3" json:"address,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *AcceptResponse_ResultPayload) Reset() { *m = AcceptResponse_ResultPayload{} } +func (m *AcceptResponse_ResultPayload) String() string { return proto.CompactTextString(m) } +func (*AcceptResponse_ResultPayload) ProtoMessage() {} +func (*AcceptResponse_ResultPayload) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{20, 0} +} + +func (m *AcceptResponse_ResultPayload) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_AcceptResponse_ResultPayload.Unmarshal(m, b) +} +func (m *AcceptResponse_ResultPayload) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_AcceptResponse_ResultPayload.Marshal(b, m, deterministic) +} +func (m *AcceptResponse_ResultPayload) XXX_Merge(src proto.Message) { + xxx_messageInfo_AcceptResponse_ResultPayload.Merge(m, src) +} +func (m *AcceptResponse_ResultPayload) XXX_Size() int { + return xxx_messageInfo_AcceptResponse_ResultPayload.Size(m) +} +func (m *AcceptResponse_ResultPayload) XXX_DiscardUnknown() { + xxx_messageInfo_AcceptResponse_ResultPayload.DiscardUnknown(m) +} + +var xxx_messageInfo_AcceptResponse_ResultPayload proto.InternalMessageInfo + +func (m *AcceptResponse_ResultPayload) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *AcceptResponse_ResultPayload) GetAddress() *AddressResponse { + if m != nil { + return m.Address + } + return nil +} + +type ConnectRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Address []byte `protobuf:"bytes,2,opt,name=address,proto3" json:"address,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ConnectRequest) Reset() { *m = ConnectRequest{} } +func (m *ConnectRequest) String() string { return proto.CompactTextString(m) } +func (*ConnectRequest) ProtoMessage() {} +func (*ConnectRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{21} +} + +func (m *ConnectRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ConnectRequest.Unmarshal(m, b) +} +func (m *ConnectRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ConnectRequest.Marshal(b, m, deterministic) +} +func (m *ConnectRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ConnectRequest.Merge(m, src) +} +func (m *ConnectRequest) XXX_Size() int { + return xxx_messageInfo_ConnectRequest.Size(m) +} +func (m *ConnectRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ConnectRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ConnectRequest proto.InternalMessageInfo + +func (m *ConnectRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *ConnectRequest) GetAddress() []byte { + if m != nil { + return m.Address + } + return nil +} + +type ConnectResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ConnectResponse) Reset() { *m = ConnectResponse{} } +func (m *ConnectResponse) String() string { return proto.CompactTextString(m) } +func (*ConnectResponse) ProtoMessage() {} +func (*ConnectResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{22} +} + +func (m *ConnectResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ConnectResponse.Unmarshal(m, b) +} +func (m *ConnectResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ConnectResponse.Marshal(b, m, deterministic) +} +func (m *ConnectResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ConnectResponse.Merge(m, src) +} +func (m *ConnectResponse) XXX_Size() int { + return xxx_messageInfo_ConnectResponse.Size(m) +} +func (m *ConnectResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ConnectResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ConnectResponse proto.InternalMessageInfo + +func (m *ConnectResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type ListenRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Backlog int64 `protobuf:"varint,2,opt,name=backlog,proto3" json:"backlog,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ListenRequest) Reset() { *m = ListenRequest{} } +func (m *ListenRequest) String() string { return proto.CompactTextString(m) } +func (*ListenRequest) ProtoMessage() {} +func (*ListenRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{23} +} + +func (m *ListenRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ListenRequest.Unmarshal(m, b) +} +func (m *ListenRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ListenRequest.Marshal(b, m, deterministic) +} +func (m *ListenRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ListenRequest.Merge(m, src) +} +func (m *ListenRequest) XXX_Size() int { + return xxx_messageInfo_ListenRequest.Size(m) +} +func (m *ListenRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ListenRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ListenRequest proto.InternalMessageInfo + +func (m *ListenRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *ListenRequest) GetBacklog() int64 { + if m != nil { + return m.Backlog + } + return 0 +} + +type ListenResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ListenResponse) Reset() { *m = ListenResponse{} } +func (m *ListenResponse) String() string { return proto.CompactTextString(m) } +func (*ListenResponse) ProtoMessage() {} +func (*ListenResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{24} +} + +func (m *ListenResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ListenResponse.Unmarshal(m, b) +} +func (m *ListenResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ListenResponse.Marshal(b, m, deterministic) +} +func (m *ListenResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ListenResponse.Merge(m, src) +} +func (m *ListenResponse) XXX_Size() int { + return xxx_messageInfo_ListenResponse.Size(m) +} +func (m *ListenResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ListenResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ListenResponse proto.InternalMessageInfo + +func (m *ListenResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type ShutdownRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + How int64 `protobuf:"varint,2,opt,name=how,proto3" json:"how,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ShutdownRequest) Reset() { *m = ShutdownRequest{} } +func (m *ShutdownRequest) String() string { return proto.CompactTextString(m) } +func (*ShutdownRequest) ProtoMessage() {} +func (*ShutdownRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{25} +} + +func (m *ShutdownRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ShutdownRequest.Unmarshal(m, b) +} +func (m *ShutdownRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ShutdownRequest.Marshal(b, m, deterministic) +} +func (m *ShutdownRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ShutdownRequest.Merge(m, src) +} +func (m *ShutdownRequest) XXX_Size() int { + return xxx_messageInfo_ShutdownRequest.Size(m) +} +func (m *ShutdownRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ShutdownRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ShutdownRequest proto.InternalMessageInfo + +func (m *ShutdownRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *ShutdownRequest) GetHow() int64 { + if m != nil { + return m.How + } + return 0 +} + +type ShutdownResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ShutdownResponse) Reset() { *m = ShutdownResponse{} } +func (m *ShutdownResponse) String() string { return proto.CompactTextString(m) } +func (*ShutdownResponse) ProtoMessage() {} +func (*ShutdownResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{26} +} + +func (m *ShutdownResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ShutdownResponse.Unmarshal(m, b) +} +func (m *ShutdownResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ShutdownResponse.Marshal(b, m, deterministic) +} +func (m *ShutdownResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ShutdownResponse.Merge(m, src) +} +func (m *ShutdownResponse) XXX_Size() int { + return xxx_messageInfo_ShutdownResponse.Size(m) +} +func (m *ShutdownResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ShutdownResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ShutdownResponse proto.InternalMessageInfo + +func (m *ShutdownResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type CloseRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *CloseRequest) Reset() { *m = CloseRequest{} } +func (m *CloseRequest) String() string { return proto.CompactTextString(m) } +func (*CloseRequest) ProtoMessage() {} +func (*CloseRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{27} +} + +func (m *CloseRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_CloseRequest.Unmarshal(m, b) +} +func (m *CloseRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_CloseRequest.Marshal(b, m, deterministic) +} +func (m *CloseRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_CloseRequest.Merge(m, src) +} +func (m *CloseRequest) XXX_Size() int { + return xxx_messageInfo_CloseRequest.Size(m) +} +func (m *CloseRequest) XXX_DiscardUnknown() { + xxx_messageInfo_CloseRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_CloseRequest proto.InternalMessageInfo + +func (m *CloseRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +type CloseResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *CloseResponse) Reset() { *m = CloseResponse{} } +func (m *CloseResponse) String() string { return proto.CompactTextString(m) } +func (*CloseResponse) ProtoMessage() {} +func (*CloseResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{28} +} + +func (m *CloseResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_CloseResponse.Unmarshal(m, b) +} +func (m *CloseResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_CloseResponse.Marshal(b, m, deterministic) +} +func (m *CloseResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_CloseResponse.Merge(m, src) +} +func (m *CloseResponse) XXX_Size() int { + return xxx_messageInfo_CloseResponse.Size(m) +} +func (m *CloseResponse) XXX_DiscardUnknown() { + xxx_messageInfo_CloseResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_CloseResponse proto.InternalMessageInfo + +func (m *CloseResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type GetSockOptRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Level int64 `protobuf:"varint,2,opt,name=level,proto3" json:"level,omitempty"` + Name int64 `protobuf:"varint,3,opt,name=name,proto3" json:"name,omitempty"` + Length uint32 `protobuf:"varint,4,opt,name=length,proto3" json:"length,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetSockOptRequest) Reset() { *m = GetSockOptRequest{} } +func (m *GetSockOptRequest) String() string { return proto.CompactTextString(m) } +func (*GetSockOptRequest) ProtoMessage() {} +func (*GetSockOptRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{29} +} + +func (m *GetSockOptRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetSockOptRequest.Unmarshal(m, b) +} +func (m *GetSockOptRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetSockOptRequest.Marshal(b, m, deterministic) +} +func (m *GetSockOptRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetSockOptRequest.Merge(m, src) +} +func (m *GetSockOptRequest) XXX_Size() int { + return xxx_messageInfo_GetSockOptRequest.Size(m) +} +func (m *GetSockOptRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetSockOptRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetSockOptRequest proto.InternalMessageInfo + +func (m *GetSockOptRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *GetSockOptRequest) GetLevel() int64 { + if m != nil { + return m.Level + } + return 0 +} + +func (m *GetSockOptRequest) GetName() int64 { + if m != nil { + return m.Name + } + return 0 +} + +func (m *GetSockOptRequest) GetLength() uint32 { + if m != nil { + return m.Length + } + return 0 +} + +type GetSockOptResponse struct { + // Types that are valid to be assigned to Result: + // *GetSockOptResponse_ErrorNumber + // *GetSockOptResponse_Opt + Result isGetSockOptResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetSockOptResponse) Reset() { *m = GetSockOptResponse{} } +func (m *GetSockOptResponse) String() string { return proto.CompactTextString(m) } +func (*GetSockOptResponse) ProtoMessage() {} +func (*GetSockOptResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{30} +} + +func (m *GetSockOptResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetSockOptResponse.Unmarshal(m, b) +} +func (m *GetSockOptResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetSockOptResponse.Marshal(b, m, deterministic) +} +func (m *GetSockOptResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetSockOptResponse.Merge(m, src) +} +func (m *GetSockOptResponse) XXX_Size() int { + return xxx_messageInfo_GetSockOptResponse.Size(m) +} +func (m *GetSockOptResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetSockOptResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetSockOptResponse proto.InternalMessageInfo + +type isGetSockOptResponse_Result interface { + isGetSockOptResponse_Result() +} + +type GetSockOptResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type GetSockOptResponse_Opt struct { + Opt []byte `protobuf:"bytes,2,opt,name=opt,proto3,oneof"` +} + +func (*GetSockOptResponse_ErrorNumber) isGetSockOptResponse_Result() {} + +func (*GetSockOptResponse_Opt) isGetSockOptResponse_Result() {} + +func (m *GetSockOptResponse) GetResult() isGetSockOptResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *GetSockOptResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*GetSockOptResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *GetSockOptResponse) GetOpt() []byte { + if x, ok := m.GetResult().(*GetSockOptResponse_Opt); ok { + return x.Opt + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*GetSockOptResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*GetSockOptResponse_ErrorNumber)(nil), + (*GetSockOptResponse_Opt)(nil), + } +} + +type SetSockOptRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Level int64 `protobuf:"varint,2,opt,name=level,proto3" json:"level,omitempty"` + Name int64 `protobuf:"varint,3,opt,name=name,proto3" json:"name,omitempty"` + Opt []byte `protobuf:"bytes,4,opt,name=opt,proto3" json:"opt,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SetSockOptRequest) Reset() { *m = SetSockOptRequest{} } +func (m *SetSockOptRequest) String() string { return proto.CompactTextString(m) } +func (*SetSockOptRequest) ProtoMessage() {} +func (*SetSockOptRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{31} +} + +func (m *SetSockOptRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SetSockOptRequest.Unmarshal(m, b) +} +func (m *SetSockOptRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SetSockOptRequest.Marshal(b, m, deterministic) +} +func (m *SetSockOptRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_SetSockOptRequest.Merge(m, src) +} +func (m *SetSockOptRequest) XXX_Size() int { + return xxx_messageInfo_SetSockOptRequest.Size(m) +} +func (m *SetSockOptRequest) XXX_DiscardUnknown() { + xxx_messageInfo_SetSockOptRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_SetSockOptRequest proto.InternalMessageInfo + +func (m *SetSockOptRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *SetSockOptRequest) GetLevel() int64 { + if m != nil { + return m.Level + } + return 0 +} + +func (m *SetSockOptRequest) GetName() int64 { + if m != nil { + return m.Name + } + return 0 +} + +func (m *SetSockOptRequest) GetOpt() []byte { + if m != nil { + return m.Opt + } + return nil +} + +type SetSockOptResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SetSockOptResponse) Reset() { *m = SetSockOptResponse{} } +func (m *SetSockOptResponse) String() string { return proto.CompactTextString(m) } +func (*SetSockOptResponse) ProtoMessage() {} +func (*SetSockOptResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{32} +} + +func (m *SetSockOptResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SetSockOptResponse.Unmarshal(m, b) +} +func (m *SetSockOptResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SetSockOptResponse.Marshal(b, m, deterministic) +} +func (m *SetSockOptResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_SetSockOptResponse.Merge(m, src) +} +func (m *SetSockOptResponse) XXX_Size() int { + return xxx_messageInfo_SetSockOptResponse.Size(m) +} +func (m *SetSockOptResponse) XXX_DiscardUnknown() { + xxx_messageInfo_SetSockOptResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_SetSockOptResponse proto.InternalMessageInfo + +func (m *SetSockOptResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type GetSockNameRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetSockNameRequest) Reset() { *m = GetSockNameRequest{} } +func (m *GetSockNameRequest) String() string { return proto.CompactTextString(m) } +func (*GetSockNameRequest) ProtoMessage() {} +func (*GetSockNameRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{33} +} + +func (m *GetSockNameRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetSockNameRequest.Unmarshal(m, b) +} +func (m *GetSockNameRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetSockNameRequest.Marshal(b, m, deterministic) +} +func (m *GetSockNameRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetSockNameRequest.Merge(m, src) +} +func (m *GetSockNameRequest) XXX_Size() int { + return xxx_messageInfo_GetSockNameRequest.Size(m) +} +func (m *GetSockNameRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetSockNameRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetSockNameRequest proto.InternalMessageInfo + +func (m *GetSockNameRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +type GetSockNameResponse struct { + // Types that are valid to be assigned to Result: + // *GetSockNameResponse_ErrorNumber + // *GetSockNameResponse_Address + Result isGetSockNameResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetSockNameResponse) Reset() { *m = GetSockNameResponse{} } +func (m *GetSockNameResponse) String() string { return proto.CompactTextString(m) } +func (*GetSockNameResponse) ProtoMessage() {} +func (*GetSockNameResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{34} +} + +func (m *GetSockNameResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetSockNameResponse.Unmarshal(m, b) +} +func (m *GetSockNameResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetSockNameResponse.Marshal(b, m, deterministic) +} +func (m *GetSockNameResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetSockNameResponse.Merge(m, src) +} +func (m *GetSockNameResponse) XXX_Size() int { + return xxx_messageInfo_GetSockNameResponse.Size(m) +} +func (m *GetSockNameResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetSockNameResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetSockNameResponse proto.InternalMessageInfo + +type isGetSockNameResponse_Result interface { + isGetSockNameResponse_Result() +} + +type GetSockNameResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type GetSockNameResponse_Address struct { + Address *AddressResponse `protobuf:"bytes,2,opt,name=address,proto3,oneof"` +} + +func (*GetSockNameResponse_ErrorNumber) isGetSockNameResponse_Result() {} + +func (*GetSockNameResponse_Address) isGetSockNameResponse_Result() {} + +func (m *GetSockNameResponse) GetResult() isGetSockNameResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *GetSockNameResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*GetSockNameResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *GetSockNameResponse) GetAddress() *AddressResponse { + if x, ok := m.GetResult().(*GetSockNameResponse_Address); ok { + return x.Address + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*GetSockNameResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*GetSockNameResponse_ErrorNumber)(nil), + (*GetSockNameResponse_Address)(nil), + } +} + +type GetPeerNameRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetPeerNameRequest) Reset() { *m = GetPeerNameRequest{} } +func (m *GetPeerNameRequest) String() string { return proto.CompactTextString(m) } +func (*GetPeerNameRequest) ProtoMessage() {} +func (*GetPeerNameRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{35} +} + +func (m *GetPeerNameRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetPeerNameRequest.Unmarshal(m, b) +} +func (m *GetPeerNameRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetPeerNameRequest.Marshal(b, m, deterministic) +} +func (m *GetPeerNameRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetPeerNameRequest.Merge(m, src) +} +func (m *GetPeerNameRequest) XXX_Size() int { + return xxx_messageInfo_GetPeerNameRequest.Size(m) +} +func (m *GetPeerNameRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetPeerNameRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetPeerNameRequest proto.InternalMessageInfo + +func (m *GetPeerNameRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +type GetPeerNameResponse struct { + // Types that are valid to be assigned to Result: + // *GetPeerNameResponse_ErrorNumber + // *GetPeerNameResponse_Address + Result isGetPeerNameResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetPeerNameResponse) Reset() { *m = GetPeerNameResponse{} } +func (m *GetPeerNameResponse) String() string { return proto.CompactTextString(m) } +func (*GetPeerNameResponse) ProtoMessage() {} +func (*GetPeerNameResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{36} +} + +func (m *GetPeerNameResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetPeerNameResponse.Unmarshal(m, b) +} +func (m *GetPeerNameResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetPeerNameResponse.Marshal(b, m, deterministic) +} +func (m *GetPeerNameResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetPeerNameResponse.Merge(m, src) +} +func (m *GetPeerNameResponse) XXX_Size() int { + return xxx_messageInfo_GetPeerNameResponse.Size(m) +} +func (m *GetPeerNameResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetPeerNameResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetPeerNameResponse proto.InternalMessageInfo + +type isGetPeerNameResponse_Result interface { + isGetPeerNameResponse_Result() +} + +type GetPeerNameResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type GetPeerNameResponse_Address struct { + Address *AddressResponse `protobuf:"bytes,2,opt,name=address,proto3,oneof"` +} + +func (*GetPeerNameResponse_ErrorNumber) isGetPeerNameResponse_Result() {} + +func (*GetPeerNameResponse_Address) isGetPeerNameResponse_Result() {} + +func (m *GetPeerNameResponse) GetResult() isGetPeerNameResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *GetPeerNameResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*GetPeerNameResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *GetPeerNameResponse) GetAddress() *AddressResponse { + if x, ok := m.GetResult().(*GetPeerNameResponse_Address); ok { + return x.Address + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*GetPeerNameResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*GetPeerNameResponse_ErrorNumber)(nil), + (*GetPeerNameResponse_Address)(nil), + } +} + +type SocketRequest struct { + Family int64 `protobuf:"varint,1,opt,name=family,proto3" json:"family,omitempty"` + Type int64 `protobuf:"varint,2,opt,name=type,proto3" json:"type,omitempty"` + Protocol int64 `protobuf:"varint,3,opt,name=protocol,proto3" json:"protocol,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SocketRequest) Reset() { *m = SocketRequest{} } +func (m *SocketRequest) String() string { return proto.CompactTextString(m) } +func (*SocketRequest) ProtoMessage() {} +func (*SocketRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{37} +} + +func (m *SocketRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SocketRequest.Unmarshal(m, b) +} +func (m *SocketRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SocketRequest.Marshal(b, m, deterministic) +} +func (m *SocketRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_SocketRequest.Merge(m, src) +} +func (m *SocketRequest) XXX_Size() int { + return xxx_messageInfo_SocketRequest.Size(m) +} +func (m *SocketRequest) XXX_DiscardUnknown() { + xxx_messageInfo_SocketRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_SocketRequest proto.InternalMessageInfo + +func (m *SocketRequest) GetFamily() int64 { + if m != nil { + return m.Family + } + return 0 +} + +func (m *SocketRequest) GetType() int64 { + if m != nil { + return m.Type + } + return 0 +} + +func (m *SocketRequest) GetProtocol() int64 { + if m != nil { + return m.Protocol + } + return 0 +} + +type SocketResponse struct { + // Types that are valid to be assigned to Result: + // *SocketResponse_ErrorNumber + // *SocketResponse_Fd + Result isSocketResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SocketResponse) Reset() { *m = SocketResponse{} } +func (m *SocketResponse) String() string { return proto.CompactTextString(m) } +func (*SocketResponse) ProtoMessage() {} +func (*SocketResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{38} +} + +func (m *SocketResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SocketResponse.Unmarshal(m, b) +} +func (m *SocketResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SocketResponse.Marshal(b, m, deterministic) +} +func (m *SocketResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_SocketResponse.Merge(m, src) +} +func (m *SocketResponse) XXX_Size() int { + return xxx_messageInfo_SocketResponse.Size(m) +} +func (m *SocketResponse) XXX_DiscardUnknown() { + xxx_messageInfo_SocketResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_SocketResponse proto.InternalMessageInfo + +type isSocketResponse_Result interface { + isSocketResponse_Result() +} + +type SocketResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type SocketResponse_Fd struct { + Fd uint32 `protobuf:"varint,2,opt,name=fd,proto3,oneof"` +} + +func (*SocketResponse_ErrorNumber) isSocketResponse_Result() {} + +func (*SocketResponse_Fd) isSocketResponse_Result() {} + +func (m *SocketResponse) GetResult() isSocketResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *SocketResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*SocketResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *SocketResponse) GetFd() uint32 { + if x, ok := m.GetResult().(*SocketResponse_Fd); ok { + return x.Fd + } + return 0 +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*SocketResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*SocketResponse_ErrorNumber)(nil), + (*SocketResponse_Fd)(nil), + } +} + +type EpollWaitRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + NumEvents uint32 `protobuf:"varint,2,opt,name=num_events,json=numEvents,proto3" json:"num_events,omitempty"` + Msec int64 `protobuf:"zigzag64,3,opt,name=msec,proto3" json:"msec,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollWaitRequest) Reset() { *m = EpollWaitRequest{} } +func (m *EpollWaitRequest) String() string { return proto.CompactTextString(m) } +func (*EpollWaitRequest) ProtoMessage() {} +func (*EpollWaitRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{39} +} + +func (m *EpollWaitRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollWaitRequest.Unmarshal(m, b) +} +func (m *EpollWaitRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollWaitRequest.Marshal(b, m, deterministic) +} +func (m *EpollWaitRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollWaitRequest.Merge(m, src) +} +func (m *EpollWaitRequest) XXX_Size() int { + return xxx_messageInfo_EpollWaitRequest.Size(m) +} +func (m *EpollWaitRequest) XXX_DiscardUnknown() { + xxx_messageInfo_EpollWaitRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollWaitRequest proto.InternalMessageInfo + +func (m *EpollWaitRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *EpollWaitRequest) GetNumEvents() uint32 { + if m != nil { + return m.NumEvents + } + return 0 +} + +func (m *EpollWaitRequest) GetMsec() int64 { + if m != nil { + return m.Msec + } + return 0 +} + +type EpollEvent struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Events uint32 `protobuf:"varint,2,opt,name=events,proto3" json:"events,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollEvent) Reset() { *m = EpollEvent{} } +func (m *EpollEvent) String() string { return proto.CompactTextString(m) } +func (*EpollEvent) ProtoMessage() {} +func (*EpollEvent) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{40} +} + +func (m *EpollEvent) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollEvent.Unmarshal(m, b) +} +func (m *EpollEvent) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollEvent.Marshal(b, m, deterministic) +} +func (m *EpollEvent) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollEvent.Merge(m, src) +} +func (m *EpollEvent) XXX_Size() int { + return xxx_messageInfo_EpollEvent.Size(m) +} +func (m *EpollEvent) XXX_DiscardUnknown() { + xxx_messageInfo_EpollEvent.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollEvent proto.InternalMessageInfo + +func (m *EpollEvent) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *EpollEvent) GetEvents() uint32 { + if m != nil { + return m.Events + } + return 0 +} + +type EpollEvents struct { + Events []*EpollEvent `protobuf:"bytes,1,rep,name=events,proto3" json:"events,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollEvents) Reset() { *m = EpollEvents{} } +func (m *EpollEvents) String() string { return proto.CompactTextString(m) } +func (*EpollEvents) ProtoMessage() {} +func (*EpollEvents) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{41} +} + +func (m *EpollEvents) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollEvents.Unmarshal(m, b) +} +func (m *EpollEvents) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollEvents.Marshal(b, m, deterministic) +} +func (m *EpollEvents) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollEvents.Merge(m, src) +} +func (m *EpollEvents) XXX_Size() int { + return xxx_messageInfo_EpollEvents.Size(m) +} +func (m *EpollEvents) XXX_DiscardUnknown() { + xxx_messageInfo_EpollEvents.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollEvents proto.InternalMessageInfo + +func (m *EpollEvents) GetEvents() []*EpollEvent { + if m != nil { + return m.Events + } + return nil +} + +type EpollWaitResponse struct { + // Types that are valid to be assigned to Result: + // *EpollWaitResponse_ErrorNumber + // *EpollWaitResponse_Events + Result isEpollWaitResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollWaitResponse) Reset() { *m = EpollWaitResponse{} } +func (m *EpollWaitResponse) String() string { return proto.CompactTextString(m) } +func (*EpollWaitResponse) ProtoMessage() {} +func (*EpollWaitResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{42} +} + +func (m *EpollWaitResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollWaitResponse.Unmarshal(m, b) +} +func (m *EpollWaitResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollWaitResponse.Marshal(b, m, deterministic) +} +func (m *EpollWaitResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollWaitResponse.Merge(m, src) +} +func (m *EpollWaitResponse) XXX_Size() int { + return xxx_messageInfo_EpollWaitResponse.Size(m) +} +func (m *EpollWaitResponse) XXX_DiscardUnknown() { + xxx_messageInfo_EpollWaitResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollWaitResponse proto.InternalMessageInfo + +type isEpollWaitResponse_Result interface { + isEpollWaitResponse_Result() +} + +type EpollWaitResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type EpollWaitResponse_Events struct { + Events *EpollEvents `protobuf:"bytes,2,opt,name=events,proto3,oneof"` +} + +func (*EpollWaitResponse_ErrorNumber) isEpollWaitResponse_Result() {} + +func (*EpollWaitResponse_Events) isEpollWaitResponse_Result() {} + +func (m *EpollWaitResponse) GetResult() isEpollWaitResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *EpollWaitResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*EpollWaitResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *EpollWaitResponse) GetEvents() *EpollEvents { + if x, ok := m.GetResult().(*EpollWaitResponse_Events); ok { + return x.Events + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*EpollWaitResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*EpollWaitResponse_ErrorNumber)(nil), + (*EpollWaitResponse_Events)(nil), + } +} + +type EpollCtlRequest struct { + Epfd uint32 `protobuf:"varint,1,opt,name=epfd,proto3" json:"epfd,omitempty"` + Op int64 `protobuf:"varint,2,opt,name=op,proto3" json:"op,omitempty"` + Fd uint32 `protobuf:"varint,3,opt,name=fd,proto3" json:"fd,omitempty"` + Event *EpollEvent `protobuf:"bytes,4,opt,name=event,proto3" json:"event,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollCtlRequest) Reset() { *m = EpollCtlRequest{} } +func (m *EpollCtlRequest) String() string { return proto.CompactTextString(m) } +func (*EpollCtlRequest) ProtoMessage() {} +func (*EpollCtlRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{43} +} + +func (m *EpollCtlRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollCtlRequest.Unmarshal(m, b) +} +func (m *EpollCtlRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollCtlRequest.Marshal(b, m, deterministic) +} +func (m *EpollCtlRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollCtlRequest.Merge(m, src) +} +func (m *EpollCtlRequest) XXX_Size() int { + return xxx_messageInfo_EpollCtlRequest.Size(m) +} +func (m *EpollCtlRequest) XXX_DiscardUnknown() { + xxx_messageInfo_EpollCtlRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollCtlRequest proto.InternalMessageInfo + +func (m *EpollCtlRequest) GetEpfd() uint32 { + if m != nil { + return m.Epfd + } + return 0 +} + +func (m *EpollCtlRequest) GetOp() int64 { + if m != nil { + return m.Op + } + return 0 +} + +func (m *EpollCtlRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *EpollCtlRequest) GetEvent() *EpollEvent { + if m != nil { + return m.Event + } + return nil +} + +type EpollCtlResponse struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3" json:"error_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollCtlResponse) Reset() { *m = EpollCtlResponse{} } +func (m *EpollCtlResponse) String() string { return proto.CompactTextString(m) } +func (*EpollCtlResponse) ProtoMessage() {} +func (*EpollCtlResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{44} +} + +func (m *EpollCtlResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollCtlResponse.Unmarshal(m, b) +} +func (m *EpollCtlResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollCtlResponse.Marshal(b, m, deterministic) +} +func (m *EpollCtlResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollCtlResponse.Merge(m, src) +} +func (m *EpollCtlResponse) XXX_Size() int { + return xxx_messageInfo_EpollCtlResponse.Size(m) +} +func (m *EpollCtlResponse) XXX_DiscardUnknown() { + xxx_messageInfo_EpollCtlResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollCtlResponse proto.InternalMessageInfo + +func (m *EpollCtlResponse) GetErrorNumber() uint32 { + if m != nil { + return m.ErrorNumber + } + return 0 +} + +type EpollCreate1Request struct { + Flag int64 `protobuf:"varint,1,opt,name=flag,proto3" json:"flag,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollCreate1Request) Reset() { *m = EpollCreate1Request{} } +func (m *EpollCreate1Request) String() string { return proto.CompactTextString(m) } +func (*EpollCreate1Request) ProtoMessage() {} +func (*EpollCreate1Request) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{45} +} + +func (m *EpollCreate1Request) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollCreate1Request.Unmarshal(m, b) +} +func (m *EpollCreate1Request) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollCreate1Request.Marshal(b, m, deterministic) +} +func (m *EpollCreate1Request) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollCreate1Request.Merge(m, src) +} +func (m *EpollCreate1Request) XXX_Size() int { + return xxx_messageInfo_EpollCreate1Request.Size(m) +} +func (m *EpollCreate1Request) XXX_DiscardUnknown() { + xxx_messageInfo_EpollCreate1Request.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollCreate1Request proto.InternalMessageInfo + +func (m *EpollCreate1Request) GetFlag() int64 { + if m != nil { + return m.Flag + } + return 0 +} + +type EpollCreate1Response struct { + // Types that are valid to be assigned to Result: + // *EpollCreate1Response_ErrorNumber + // *EpollCreate1Response_Fd + Result isEpollCreate1Response_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EpollCreate1Response) Reset() { *m = EpollCreate1Response{} } +func (m *EpollCreate1Response) String() string { return proto.CompactTextString(m) } +func (*EpollCreate1Response) ProtoMessage() {} +func (*EpollCreate1Response) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{46} +} + +func (m *EpollCreate1Response) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EpollCreate1Response.Unmarshal(m, b) +} +func (m *EpollCreate1Response) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EpollCreate1Response.Marshal(b, m, deterministic) +} +func (m *EpollCreate1Response) XXX_Merge(src proto.Message) { + xxx_messageInfo_EpollCreate1Response.Merge(m, src) +} +func (m *EpollCreate1Response) XXX_Size() int { + return xxx_messageInfo_EpollCreate1Response.Size(m) +} +func (m *EpollCreate1Response) XXX_DiscardUnknown() { + xxx_messageInfo_EpollCreate1Response.DiscardUnknown(m) +} + +var xxx_messageInfo_EpollCreate1Response proto.InternalMessageInfo + +type isEpollCreate1Response_Result interface { + isEpollCreate1Response_Result() +} + +type EpollCreate1Response_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type EpollCreate1Response_Fd struct { + Fd uint32 `protobuf:"varint,2,opt,name=fd,proto3,oneof"` +} + +func (*EpollCreate1Response_ErrorNumber) isEpollCreate1Response_Result() {} + +func (*EpollCreate1Response_Fd) isEpollCreate1Response_Result() {} + +func (m *EpollCreate1Response) GetResult() isEpollCreate1Response_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *EpollCreate1Response) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*EpollCreate1Response_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *EpollCreate1Response) GetFd() uint32 { + if x, ok := m.GetResult().(*EpollCreate1Response_Fd); ok { + return x.Fd + } + return 0 +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*EpollCreate1Response) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*EpollCreate1Response_ErrorNumber)(nil), + (*EpollCreate1Response_Fd)(nil), + } +} + +type PollRequest struct { + Fd uint32 `protobuf:"varint,1,opt,name=fd,proto3" json:"fd,omitempty"` + Events uint32 `protobuf:"varint,2,opt,name=events,proto3" json:"events,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PollRequest) Reset() { *m = PollRequest{} } +func (m *PollRequest) String() string { return proto.CompactTextString(m) } +func (*PollRequest) ProtoMessage() {} +func (*PollRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{47} +} + +func (m *PollRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PollRequest.Unmarshal(m, b) +} +func (m *PollRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PollRequest.Marshal(b, m, deterministic) +} +func (m *PollRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_PollRequest.Merge(m, src) +} +func (m *PollRequest) XXX_Size() int { + return xxx_messageInfo_PollRequest.Size(m) +} +func (m *PollRequest) XXX_DiscardUnknown() { + xxx_messageInfo_PollRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_PollRequest proto.InternalMessageInfo + +func (m *PollRequest) GetFd() uint32 { + if m != nil { + return m.Fd + } + return 0 +} + +func (m *PollRequest) GetEvents() uint32 { + if m != nil { + return m.Events + } + return 0 +} + +type PollResponse struct { + // Types that are valid to be assigned to Result: + // *PollResponse_ErrorNumber + // *PollResponse_Events + Result isPollResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PollResponse) Reset() { *m = PollResponse{} } +func (m *PollResponse) String() string { return proto.CompactTextString(m) } +func (*PollResponse) ProtoMessage() {} +func (*PollResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{48} +} + +func (m *PollResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PollResponse.Unmarshal(m, b) +} +func (m *PollResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PollResponse.Marshal(b, m, deterministic) +} +func (m *PollResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_PollResponse.Merge(m, src) +} +func (m *PollResponse) XXX_Size() int { + return xxx_messageInfo_PollResponse.Size(m) +} +func (m *PollResponse) XXX_DiscardUnknown() { + xxx_messageInfo_PollResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_PollResponse proto.InternalMessageInfo + +type isPollResponse_Result interface { + isPollResponse_Result() +} + +type PollResponse_ErrorNumber struct { + ErrorNumber uint32 `protobuf:"varint,1,opt,name=error_number,json=errorNumber,proto3,oneof"` +} + +type PollResponse_Events struct { + Events uint32 `protobuf:"varint,2,opt,name=events,proto3,oneof"` +} + +func (*PollResponse_ErrorNumber) isPollResponse_Result() {} + +func (*PollResponse_Events) isPollResponse_Result() {} + +func (m *PollResponse) GetResult() isPollResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *PollResponse) GetErrorNumber() uint32 { + if x, ok := m.GetResult().(*PollResponse_ErrorNumber); ok { + return x.ErrorNumber + } + return 0 +} + +func (m *PollResponse) GetEvents() uint32 { + if x, ok := m.GetResult().(*PollResponse_Events); ok { + return x.Events + } + return 0 +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*PollResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*PollResponse_ErrorNumber)(nil), + (*PollResponse_Events)(nil), + } +} + +type SyscallRequest struct { + // Types that are valid to be assigned to Args: + // *SyscallRequest_Socket + // *SyscallRequest_Sendmsg + // *SyscallRequest_Recvmsg + // *SyscallRequest_Bind + // *SyscallRequest_Accept + // *SyscallRequest_Connect + // *SyscallRequest_Listen + // *SyscallRequest_Shutdown + // *SyscallRequest_Close + // *SyscallRequest_GetSockOpt + // *SyscallRequest_SetSockOpt + // *SyscallRequest_GetSockName + // *SyscallRequest_GetPeerName + // *SyscallRequest_EpollWait + // *SyscallRequest_EpollCtl + // *SyscallRequest_EpollCreate1 + // *SyscallRequest_Poll + // *SyscallRequest_Read + // *SyscallRequest_Write + // *SyscallRequest_Open + // *SyscallRequest_Ioctl + // *SyscallRequest_WriteFile + // *SyscallRequest_ReadFile + Args isSyscallRequest_Args `protobuf_oneof:"args"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SyscallRequest) Reset() { *m = SyscallRequest{} } +func (m *SyscallRequest) String() string { return proto.CompactTextString(m) } +func (*SyscallRequest) ProtoMessage() {} +func (*SyscallRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{49} +} + +func (m *SyscallRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SyscallRequest.Unmarshal(m, b) +} +func (m *SyscallRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SyscallRequest.Marshal(b, m, deterministic) +} +func (m *SyscallRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_SyscallRequest.Merge(m, src) +} +func (m *SyscallRequest) XXX_Size() int { + return xxx_messageInfo_SyscallRequest.Size(m) +} +func (m *SyscallRequest) XXX_DiscardUnknown() { + xxx_messageInfo_SyscallRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_SyscallRequest proto.InternalMessageInfo + +type isSyscallRequest_Args interface { + isSyscallRequest_Args() +} + +type SyscallRequest_Socket struct { + Socket *SocketRequest `protobuf:"bytes,1,opt,name=socket,proto3,oneof"` +} + +type SyscallRequest_Sendmsg struct { + Sendmsg *SendmsgRequest `protobuf:"bytes,2,opt,name=sendmsg,proto3,oneof"` +} + +type SyscallRequest_Recvmsg struct { + Recvmsg *RecvmsgRequest `protobuf:"bytes,3,opt,name=recvmsg,proto3,oneof"` +} + +type SyscallRequest_Bind struct { + Bind *BindRequest `protobuf:"bytes,4,opt,name=bind,proto3,oneof"` +} + +type SyscallRequest_Accept struct { + Accept *AcceptRequest `protobuf:"bytes,5,opt,name=accept,proto3,oneof"` +} + +type SyscallRequest_Connect struct { + Connect *ConnectRequest `protobuf:"bytes,6,opt,name=connect,proto3,oneof"` +} + +type SyscallRequest_Listen struct { + Listen *ListenRequest `protobuf:"bytes,7,opt,name=listen,proto3,oneof"` +} + +type SyscallRequest_Shutdown struct { + Shutdown *ShutdownRequest `protobuf:"bytes,8,opt,name=shutdown,proto3,oneof"` +} + +type SyscallRequest_Close struct { + Close *CloseRequest `protobuf:"bytes,9,opt,name=close,proto3,oneof"` +} + +type SyscallRequest_GetSockOpt struct { + GetSockOpt *GetSockOptRequest `protobuf:"bytes,10,opt,name=get_sock_opt,json=getSockOpt,proto3,oneof"` +} + +type SyscallRequest_SetSockOpt struct { + SetSockOpt *SetSockOptRequest `protobuf:"bytes,11,opt,name=set_sock_opt,json=setSockOpt,proto3,oneof"` +} + +type SyscallRequest_GetSockName struct { + GetSockName *GetSockNameRequest `protobuf:"bytes,12,opt,name=get_sock_name,json=getSockName,proto3,oneof"` +} + +type SyscallRequest_GetPeerName struct { + GetPeerName *GetPeerNameRequest `protobuf:"bytes,13,opt,name=get_peer_name,json=getPeerName,proto3,oneof"` +} + +type SyscallRequest_EpollWait struct { + EpollWait *EpollWaitRequest `protobuf:"bytes,14,opt,name=epoll_wait,json=epollWait,proto3,oneof"` +} + +type SyscallRequest_EpollCtl struct { + EpollCtl *EpollCtlRequest `protobuf:"bytes,15,opt,name=epoll_ctl,json=epollCtl,proto3,oneof"` +} + +type SyscallRequest_EpollCreate1 struct { + EpollCreate1 *EpollCreate1Request `protobuf:"bytes,16,opt,name=epoll_create1,json=epollCreate1,proto3,oneof"` +} + +type SyscallRequest_Poll struct { + Poll *PollRequest `protobuf:"bytes,17,opt,name=poll,proto3,oneof"` +} + +type SyscallRequest_Read struct { + Read *ReadRequest `protobuf:"bytes,18,opt,name=read,proto3,oneof"` +} + +type SyscallRequest_Write struct { + Write *WriteRequest `protobuf:"bytes,19,opt,name=write,proto3,oneof"` +} + +type SyscallRequest_Open struct { + Open *OpenRequest `protobuf:"bytes,20,opt,name=open,proto3,oneof"` +} + +type SyscallRequest_Ioctl struct { + Ioctl *IOCtlRequest `protobuf:"bytes,21,opt,name=ioctl,proto3,oneof"` +} + +type SyscallRequest_WriteFile struct { + WriteFile *WriteFileRequest `protobuf:"bytes,22,opt,name=write_file,json=writeFile,proto3,oneof"` +} + +type SyscallRequest_ReadFile struct { + ReadFile *ReadFileRequest `protobuf:"bytes,23,opt,name=read_file,json=readFile,proto3,oneof"` +} + +func (*SyscallRequest_Socket) isSyscallRequest_Args() {} + +func (*SyscallRequest_Sendmsg) isSyscallRequest_Args() {} + +func (*SyscallRequest_Recvmsg) isSyscallRequest_Args() {} + +func (*SyscallRequest_Bind) isSyscallRequest_Args() {} + +func (*SyscallRequest_Accept) isSyscallRequest_Args() {} + +func (*SyscallRequest_Connect) isSyscallRequest_Args() {} + +func (*SyscallRequest_Listen) isSyscallRequest_Args() {} + +func (*SyscallRequest_Shutdown) isSyscallRequest_Args() {} + +func (*SyscallRequest_Close) isSyscallRequest_Args() {} + +func (*SyscallRequest_GetSockOpt) isSyscallRequest_Args() {} + +func (*SyscallRequest_SetSockOpt) isSyscallRequest_Args() {} + +func (*SyscallRequest_GetSockName) isSyscallRequest_Args() {} + +func (*SyscallRequest_GetPeerName) isSyscallRequest_Args() {} + +func (*SyscallRequest_EpollWait) isSyscallRequest_Args() {} + +func (*SyscallRequest_EpollCtl) isSyscallRequest_Args() {} + +func (*SyscallRequest_EpollCreate1) isSyscallRequest_Args() {} + +func (*SyscallRequest_Poll) isSyscallRequest_Args() {} + +func (*SyscallRequest_Read) isSyscallRequest_Args() {} + +func (*SyscallRequest_Write) isSyscallRequest_Args() {} + +func (*SyscallRequest_Open) isSyscallRequest_Args() {} + +func (*SyscallRequest_Ioctl) isSyscallRequest_Args() {} + +func (*SyscallRequest_WriteFile) isSyscallRequest_Args() {} + +func (*SyscallRequest_ReadFile) isSyscallRequest_Args() {} + +func (m *SyscallRequest) GetArgs() isSyscallRequest_Args { + if m != nil { + return m.Args + } + return nil +} + +func (m *SyscallRequest) GetSocket() *SocketRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Socket); ok { + return x.Socket + } + return nil +} + +func (m *SyscallRequest) GetSendmsg() *SendmsgRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Sendmsg); ok { + return x.Sendmsg + } + return nil +} + +func (m *SyscallRequest) GetRecvmsg() *RecvmsgRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Recvmsg); ok { + return x.Recvmsg + } + return nil +} + +func (m *SyscallRequest) GetBind() *BindRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Bind); ok { + return x.Bind + } + return nil +} + +func (m *SyscallRequest) GetAccept() *AcceptRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Accept); ok { + return x.Accept + } + return nil +} + +func (m *SyscallRequest) GetConnect() *ConnectRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Connect); ok { + return x.Connect + } + return nil +} + +func (m *SyscallRequest) GetListen() *ListenRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Listen); ok { + return x.Listen + } + return nil +} + +func (m *SyscallRequest) GetShutdown() *ShutdownRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Shutdown); ok { + return x.Shutdown + } + return nil +} + +func (m *SyscallRequest) GetClose() *CloseRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Close); ok { + return x.Close + } + return nil +} + +func (m *SyscallRequest) GetGetSockOpt() *GetSockOptRequest { + if x, ok := m.GetArgs().(*SyscallRequest_GetSockOpt); ok { + return x.GetSockOpt + } + return nil +} + +func (m *SyscallRequest) GetSetSockOpt() *SetSockOptRequest { + if x, ok := m.GetArgs().(*SyscallRequest_SetSockOpt); ok { + return x.SetSockOpt + } + return nil +} + +func (m *SyscallRequest) GetGetSockName() *GetSockNameRequest { + if x, ok := m.GetArgs().(*SyscallRequest_GetSockName); ok { + return x.GetSockName + } + return nil +} + +func (m *SyscallRequest) GetGetPeerName() *GetPeerNameRequest { + if x, ok := m.GetArgs().(*SyscallRequest_GetPeerName); ok { + return x.GetPeerName + } + return nil +} + +func (m *SyscallRequest) GetEpollWait() *EpollWaitRequest { + if x, ok := m.GetArgs().(*SyscallRequest_EpollWait); ok { + return x.EpollWait + } + return nil +} + +func (m *SyscallRequest) GetEpollCtl() *EpollCtlRequest { + if x, ok := m.GetArgs().(*SyscallRequest_EpollCtl); ok { + return x.EpollCtl + } + return nil +} + +func (m *SyscallRequest) GetEpollCreate1() *EpollCreate1Request { + if x, ok := m.GetArgs().(*SyscallRequest_EpollCreate1); ok { + return x.EpollCreate1 + } + return nil +} + +func (m *SyscallRequest) GetPoll() *PollRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Poll); ok { + return x.Poll + } + return nil +} + +func (m *SyscallRequest) GetRead() *ReadRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Read); ok { + return x.Read + } + return nil +} + +func (m *SyscallRequest) GetWrite() *WriteRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Write); ok { + return x.Write + } + return nil +} + +func (m *SyscallRequest) GetOpen() *OpenRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Open); ok { + return x.Open + } + return nil +} + +func (m *SyscallRequest) GetIoctl() *IOCtlRequest { + if x, ok := m.GetArgs().(*SyscallRequest_Ioctl); ok { + return x.Ioctl + } + return nil +} + +func (m *SyscallRequest) GetWriteFile() *WriteFileRequest { + if x, ok := m.GetArgs().(*SyscallRequest_WriteFile); ok { + return x.WriteFile + } + return nil +} + +func (m *SyscallRequest) GetReadFile() *ReadFileRequest { + if x, ok := m.GetArgs().(*SyscallRequest_ReadFile); ok { + return x.ReadFile + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*SyscallRequest) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*SyscallRequest_Socket)(nil), + (*SyscallRequest_Sendmsg)(nil), + (*SyscallRequest_Recvmsg)(nil), + (*SyscallRequest_Bind)(nil), + (*SyscallRequest_Accept)(nil), + (*SyscallRequest_Connect)(nil), + (*SyscallRequest_Listen)(nil), + (*SyscallRequest_Shutdown)(nil), + (*SyscallRequest_Close)(nil), + (*SyscallRequest_GetSockOpt)(nil), + (*SyscallRequest_SetSockOpt)(nil), + (*SyscallRequest_GetSockName)(nil), + (*SyscallRequest_GetPeerName)(nil), + (*SyscallRequest_EpollWait)(nil), + (*SyscallRequest_EpollCtl)(nil), + (*SyscallRequest_EpollCreate1)(nil), + (*SyscallRequest_Poll)(nil), + (*SyscallRequest_Read)(nil), + (*SyscallRequest_Write)(nil), + (*SyscallRequest_Open)(nil), + (*SyscallRequest_Ioctl)(nil), + (*SyscallRequest_WriteFile)(nil), + (*SyscallRequest_ReadFile)(nil), + } +} + +type SyscallResponse struct { + // Types that are valid to be assigned to Result: + // *SyscallResponse_Socket + // *SyscallResponse_Sendmsg + // *SyscallResponse_Recvmsg + // *SyscallResponse_Bind + // *SyscallResponse_Accept + // *SyscallResponse_Connect + // *SyscallResponse_Listen + // *SyscallResponse_Shutdown + // *SyscallResponse_Close + // *SyscallResponse_GetSockOpt + // *SyscallResponse_SetSockOpt + // *SyscallResponse_GetSockName + // *SyscallResponse_GetPeerName + // *SyscallResponse_EpollWait + // *SyscallResponse_EpollCtl + // *SyscallResponse_EpollCreate1 + // *SyscallResponse_Poll + // *SyscallResponse_Read + // *SyscallResponse_Write + // *SyscallResponse_Open + // *SyscallResponse_Ioctl + // *SyscallResponse_WriteFile + // *SyscallResponse_ReadFile + Result isSyscallResponse_Result `protobuf_oneof:"result"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SyscallResponse) Reset() { *m = SyscallResponse{} } +func (m *SyscallResponse) String() string { return proto.CompactTextString(m) } +func (*SyscallResponse) ProtoMessage() {} +func (*SyscallResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_dd04f3a8f0c5288b, []int{50} +} + +func (m *SyscallResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SyscallResponse.Unmarshal(m, b) +} +func (m *SyscallResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SyscallResponse.Marshal(b, m, deterministic) +} +func (m *SyscallResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_SyscallResponse.Merge(m, src) +} +func (m *SyscallResponse) XXX_Size() int { + return xxx_messageInfo_SyscallResponse.Size(m) +} +func (m *SyscallResponse) XXX_DiscardUnknown() { + xxx_messageInfo_SyscallResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_SyscallResponse proto.InternalMessageInfo + +type isSyscallResponse_Result interface { + isSyscallResponse_Result() +} + +type SyscallResponse_Socket struct { + Socket *SocketResponse `protobuf:"bytes,1,opt,name=socket,proto3,oneof"` +} + +type SyscallResponse_Sendmsg struct { + Sendmsg *SendmsgResponse `protobuf:"bytes,2,opt,name=sendmsg,proto3,oneof"` +} + +type SyscallResponse_Recvmsg struct { + Recvmsg *RecvmsgResponse `protobuf:"bytes,3,opt,name=recvmsg,proto3,oneof"` +} + +type SyscallResponse_Bind struct { + Bind *BindResponse `protobuf:"bytes,4,opt,name=bind,proto3,oneof"` +} + +type SyscallResponse_Accept struct { + Accept *AcceptResponse `protobuf:"bytes,5,opt,name=accept,proto3,oneof"` +} + +type SyscallResponse_Connect struct { + Connect *ConnectResponse `protobuf:"bytes,6,opt,name=connect,proto3,oneof"` +} + +type SyscallResponse_Listen struct { + Listen *ListenResponse `protobuf:"bytes,7,opt,name=listen,proto3,oneof"` +} + +type SyscallResponse_Shutdown struct { + Shutdown *ShutdownResponse `protobuf:"bytes,8,opt,name=shutdown,proto3,oneof"` +} + +type SyscallResponse_Close struct { + Close *CloseResponse `protobuf:"bytes,9,opt,name=close,proto3,oneof"` +} + +type SyscallResponse_GetSockOpt struct { + GetSockOpt *GetSockOptResponse `protobuf:"bytes,10,opt,name=get_sock_opt,json=getSockOpt,proto3,oneof"` +} + +type SyscallResponse_SetSockOpt struct { + SetSockOpt *SetSockOptResponse `protobuf:"bytes,11,opt,name=set_sock_opt,json=setSockOpt,proto3,oneof"` +} + +type SyscallResponse_GetSockName struct { + GetSockName *GetSockNameResponse `protobuf:"bytes,12,opt,name=get_sock_name,json=getSockName,proto3,oneof"` +} + +type SyscallResponse_GetPeerName struct { + GetPeerName *GetPeerNameResponse `protobuf:"bytes,13,opt,name=get_peer_name,json=getPeerName,proto3,oneof"` +} + +type SyscallResponse_EpollWait struct { + EpollWait *EpollWaitResponse `protobuf:"bytes,14,opt,name=epoll_wait,json=epollWait,proto3,oneof"` +} + +type SyscallResponse_EpollCtl struct { + EpollCtl *EpollCtlResponse `protobuf:"bytes,15,opt,name=epoll_ctl,json=epollCtl,proto3,oneof"` +} + +type SyscallResponse_EpollCreate1 struct { + EpollCreate1 *EpollCreate1Response `protobuf:"bytes,16,opt,name=epoll_create1,json=epollCreate1,proto3,oneof"` +} + +type SyscallResponse_Poll struct { + Poll *PollResponse `protobuf:"bytes,17,opt,name=poll,proto3,oneof"` +} + +type SyscallResponse_Read struct { + Read *ReadResponse `protobuf:"bytes,18,opt,name=read,proto3,oneof"` +} + +type SyscallResponse_Write struct { + Write *WriteResponse `protobuf:"bytes,19,opt,name=write,proto3,oneof"` +} + +type SyscallResponse_Open struct { + Open *OpenResponse `protobuf:"bytes,20,opt,name=open,proto3,oneof"` +} + +type SyscallResponse_Ioctl struct { + Ioctl *IOCtlResponse `protobuf:"bytes,21,opt,name=ioctl,proto3,oneof"` +} + +type SyscallResponse_WriteFile struct { + WriteFile *WriteFileResponse `protobuf:"bytes,22,opt,name=write_file,json=writeFile,proto3,oneof"` +} + +type SyscallResponse_ReadFile struct { + ReadFile *ReadFileResponse `protobuf:"bytes,23,opt,name=read_file,json=readFile,proto3,oneof"` +} + +func (*SyscallResponse_Socket) isSyscallResponse_Result() {} + +func (*SyscallResponse_Sendmsg) isSyscallResponse_Result() {} + +func (*SyscallResponse_Recvmsg) isSyscallResponse_Result() {} + +func (*SyscallResponse_Bind) isSyscallResponse_Result() {} + +func (*SyscallResponse_Accept) isSyscallResponse_Result() {} + +func (*SyscallResponse_Connect) isSyscallResponse_Result() {} + +func (*SyscallResponse_Listen) isSyscallResponse_Result() {} + +func (*SyscallResponse_Shutdown) isSyscallResponse_Result() {} + +func (*SyscallResponse_Close) isSyscallResponse_Result() {} + +func (*SyscallResponse_GetSockOpt) isSyscallResponse_Result() {} + +func (*SyscallResponse_SetSockOpt) isSyscallResponse_Result() {} + +func (*SyscallResponse_GetSockName) isSyscallResponse_Result() {} + +func (*SyscallResponse_GetPeerName) isSyscallResponse_Result() {} + +func (*SyscallResponse_EpollWait) isSyscallResponse_Result() {} + +func (*SyscallResponse_EpollCtl) isSyscallResponse_Result() {} + +func (*SyscallResponse_EpollCreate1) isSyscallResponse_Result() {} + +func (*SyscallResponse_Poll) isSyscallResponse_Result() {} + +func (*SyscallResponse_Read) isSyscallResponse_Result() {} + +func (*SyscallResponse_Write) isSyscallResponse_Result() {} + +func (*SyscallResponse_Open) isSyscallResponse_Result() {} + +func (*SyscallResponse_Ioctl) isSyscallResponse_Result() {} + +func (*SyscallResponse_WriteFile) isSyscallResponse_Result() {} + +func (*SyscallResponse_ReadFile) isSyscallResponse_Result() {} + +func (m *SyscallResponse) GetResult() isSyscallResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *SyscallResponse) GetSocket() *SocketResponse { + if x, ok := m.GetResult().(*SyscallResponse_Socket); ok { + return x.Socket + } + return nil +} + +func (m *SyscallResponse) GetSendmsg() *SendmsgResponse { + if x, ok := m.GetResult().(*SyscallResponse_Sendmsg); ok { + return x.Sendmsg + } + return nil +} + +func (m *SyscallResponse) GetRecvmsg() *RecvmsgResponse { + if x, ok := m.GetResult().(*SyscallResponse_Recvmsg); ok { + return x.Recvmsg + } + return nil +} + +func (m *SyscallResponse) GetBind() *BindResponse { + if x, ok := m.GetResult().(*SyscallResponse_Bind); ok { + return x.Bind + } + return nil +} + +func (m *SyscallResponse) GetAccept() *AcceptResponse { + if x, ok := m.GetResult().(*SyscallResponse_Accept); ok { + return x.Accept + } + return nil +} + +func (m *SyscallResponse) GetConnect() *ConnectResponse { + if x, ok := m.GetResult().(*SyscallResponse_Connect); ok { + return x.Connect + } + return nil +} + +func (m *SyscallResponse) GetListen() *ListenResponse { + if x, ok := m.GetResult().(*SyscallResponse_Listen); ok { + return x.Listen + } + return nil +} + +func (m *SyscallResponse) GetShutdown() *ShutdownResponse { + if x, ok := m.GetResult().(*SyscallResponse_Shutdown); ok { + return x.Shutdown + } + return nil +} + +func (m *SyscallResponse) GetClose() *CloseResponse { + if x, ok := m.GetResult().(*SyscallResponse_Close); ok { + return x.Close + } + return nil +} + +func (m *SyscallResponse) GetGetSockOpt() *GetSockOptResponse { + if x, ok := m.GetResult().(*SyscallResponse_GetSockOpt); ok { + return x.GetSockOpt + } + return nil +} + +func (m *SyscallResponse) GetSetSockOpt() *SetSockOptResponse { + if x, ok := m.GetResult().(*SyscallResponse_SetSockOpt); ok { + return x.SetSockOpt + } + return nil +} + +func (m *SyscallResponse) GetGetSockName() *GetSockNameResponse { + if x, ok := m.GetResult().(*SyscallResponse_GetSockName); ok { + return x.GetSockName + } + return nil +} + +func (m *SyscallResponse) GetGetPeerName() *GetPeerNameResponse { + if x, ok := m.GetResult().(*SyscallResponse_GetPeerName); ok { + return x.GetPeerName + } + return nil +} + +func (m *SyscallResponse) GetEpollWait() *EpollWaitResponse { + if x, ok := m.GetResult().(*SyscallResponse_EpollWait); ok { + return x.EpollWait + } + return nil +} + +func (m *SyscallResponse) GetEpollCtl() *EpollCtlResponse { + if x, ok := m.GetResult().(*SyscallResponse_EpollCtl); ok { + return x.EpollCtl + } + return nil +} + +func (m *SyscallResponse) GetEpollCreate1() *EpollCreate1Response { + if x, ok := m.GetResult().(*SyscallResponse_EpollCreate1); ok { + return x.EpollCreate1 + } + return nil +} + +func (m *SyscallResponse) GetPoll() *PollResponse { + if x, ok := m.GetResult().(*SyscallResponse_Poll); ok { + return x.Poll + } + return nil +} + +func (m *SyscallResponse) GetRead() *ReadResponse { + if x, ok := m.GetResult().(*SyscallResponse_Read); ok { + return x.Read + } + return nil +} + +func (m *SyscallResponse) GetWrite() *WriteResponse { + if x, ok := m.GetResult().(*SyscallResponse_Write); ok { + return x.Write + } + return nil +} + +func (m *SyscallResponse) GetOpen() *OpenResponse { + if x, ok := m.GetResult().(*SyscallResponse_Open); ok { + return x.Open + } + return nil +} + +func (m *SyscallResponse) GetIoctl() *IOCtlResponse { + if x, ok := m.GetResult().(*SyscallResponse_Ioctl); ok { + return x.Ioctl + } + return nil +} + +func (m *SyscallResponse) GetWriteFile() *WriteFileResponse { + if x, ok := m.GetResult().(*SyscallResponse_WriteFile); ok { + return x.WriteFile + } + return nil +} + +func (m *SyscallResponse) GetReadFile() *ReadFileResponse { + if x, ok := m.GetResult().(*SyscallResponse_ReadFile); ok { + return x.ReadFile + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*SyscallResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*SyscallResponse_Socket)(nil), + (*SyscallResponse_Sendmsg)(nil), + (*SyscallResponse_Recvmsg)(nil), + (*SyscallResponse_Bind)(nil), + (*SyscallResponse_Accept)(nil), + (*SyscallResponse_Connect)(nil), + (*SyscallResponse_Listen)(nil), + (*SyscallResponse_Shutdown)(nil), + (*SyscallResponse_Close)(nil), + (*SyscallResponse_GetSockOpt)(nil), + (*SyscallResponse_SetSockOpt)(nil), + (*SyscallResponse_GetSockName)(nil), + (*SyscallResponse_GetPeerName)(nil), + (*SyscallResponse_EpollWait)(nil), + (*SyscallResponse_EpollCtl)(nil), + (*SyscallResponse_EpollCreate1)(nil), + (*SyscallResponse_Poll)(nil), + (*SyscallResponse_Read)(nil), + (*SyscallResponse_Write)(nil), + (*SyscallResponse_Open)(nil), + (*SyscallResponse_Ioctl)(nil), + (*SyscallResponse_WriteFile)(nil), + (*SyscallResponse_ReadFile)(nil), + } +} + +func init() { + proto.RegisterType((*SendmsgRequest)(nil), "syscall_rpc.SendmsgRequest") + proto.RegisterType((*SendmsgResponse)(nil), "syscall_rpc.SendmsgResponse") + proto.RegisterType((*IOCtlRequest)(nil), "syscall_rpc.IOCtlRequest") + proto.RegisterType((*IOCtlResponse)(nil), "syscall_rpc.IOCtlResponse") + proto.RegisterType((*RecvmsgRequest)(nil), "syscall_rpc.RecvmsgRequest") + proto.RegisterType((*OpenRequest)(nil), "syscall_rpc.OpenRequest") + proto.RegisterType((*OpenResponse)(nil), "syscall_rpc.OpenResponse") + proto.RegisterType((*ReadRequest)(nil), "syscall_rpc.ReadRequest") + proto.RegisterType((*ReadResponse)(nil), "syscall_rpc.ReadResponse") + proto.RegisterType((*ReadFileRequest)(nil), "syscall_rpc.ReadFileRequest") + proto.RegisterType((*ReadFileResponse)(nil), "syscall_rpc.ReadFileResponse") + proto.RegisterType((*WriteRequest)(nil), "syscall_rpc.WriteRequest") + proto.RegisterType((*WriteResponse)(nil), "syscall_rpc.WriteResponse") + proto.RegisterType((*WriteFileRequest)(nil), "syscall_rpc.WriteFileRequest") + proto.RegisterType((*WriteFileResponse)(nil), "syscall_rpc.WriteFileResponse") + proto.RegisterType((*AddressResponse)(nil), "syscall_rpc.AddressResponse") + proto.RegisterType((*RecvmsgResponse)(nil), "syscall_rpc.RecvmsgResponse") + proto.RegisterType((*RecvmsgResponse_ResultPayload)(nil), "syscall_rpc.RecvmsgResponse.ResultPayload") + proto.RegisterType((*BindRequest)(nil), "syscall_rpc.BindRequest") + proto.RegisterType((*BindResponse)(nil), "syscall_rpc.BindResponse") + proto.RegisterType((*AcceptRequest)(nil), "syscall_rpc.AcceptRequest") + proto.RegisterType((*AcceptResponse)(nil), "syscall_rpc.AcceptResponse") + proto.RegisterType((*AcceptResponse_ResultPayload)(nil), "syscall_rpc.AcceptResponse.ResultPayload") + proto.RegisterType((*ConnectRequest)(nil), "syscall_rpc.ConnectRequest") + proto.RegisterType((*ConnectResponse)(nil), "syscall_rpc.ConnectResponse") + proto.RegisterType((*ListenRequest)(nil), "syscall_rpc.ListenRequest") + proto.RegisterType((*ListenResponse)(nil), "syscall_rpc.ListenResponse") + proto.RegisterType((*ShutdownRequest)(nil), "syscall_rpc.ShutdownRequest") + proto.RegisterType((*ShutdownResponse)(nil), "syscall_rpc.ShutdownResponse") + proto.RegisterType((*CloseRequest)(nil), "syscall_rpc.CloseRequest") + proto.RegisterType((*CloseResponse)(nil), "syscall_rpc.CloseResponse") + proto.RegisterType((*GetSockOptRequest)(nil), "syscall_rpc.GetSockOptRequest") + proto.RegisterType((*GetSockOptResponse)(nil), "syscall_rpc.GetSockOptResponse") + proto.RegisterType((*SetSockOptRequest)(nil), "syscall_rpc.SetSockOptRequest") + proto.RegisterType((*SetSockOptResponse)(nil), "syscall_rpc.SetSockOptResponse") + proto.RegisterType((*GetSockNameRequest)(nil), "syscall_rpc.GetSockNameRequest") + proto.RegisterType((*GetSockNameResponse)(nil), "syscall_rpc.GetSockNameResponse") + proto.RegisterType((*GetPeerNameRequest)(nil), "syscall_rpc.GetPeerNameRequest") + proto.RegisterType((*GetPeerNameResponse)(nil), "syscall_rpc.GetPeerNameResponse") + proto.RegisterType((*SocketRequest)(nil), "syscall_rpc.SocketRequest") + proto.RegisterType((*SocketResponse)(nil), "syscall_rpc.SocketResponse") + proto.RegisterType((*EpollWaitRequest)(nil), "syscall_rpc.EpollWaitRequest") + proto.RegisterType((*EpollEvent)(nil), "syscall_rpc.EpollEvent") + proto.RegisterType((*EpollEvents)(nil), "syscall_rpc.EpollEvents") + proto.RegisterType((*EpollWaitResponse)(nil), "syscall_rpc.EpollWaitResponse") + proto.RegisterType((*EpollCtlRequest)(nil), "syscall_rpc.EpollCtlRequest") + proto.RegisterType((*EpollCtlResponse)(nil), "syscall_rpc.EpollCtlResponse") + proto.RegisterType((*EpollCreate1Request)(nil), "syscall_rpc.EpollCreate1Request") + proto.RegisterType((*EpollCreate1Response)(nil), "syscall_rpc.EpollCreate1Response") + proto.RegisterType((*PollRequest)(nil), "syscall_rpc.PollRequest") + proto.RegisterType((*PollResponse)(nil), "syscall_rpc.PollResponse") + proto.RegisterType((*SyscallRequest)(nil), "syscall_rpc.SyscallRequest") + proto.RegisterType((*SyscallResponse)(nil), "syscall_rpc.SyscallResponse") +} + +func init() { + proto.RegisterFile("pkg/sentry/socket/rpcinet/syscall_rpc.proto", fileDescriptor_dd04f3a8f0c5288b) +} + +var fileDescriptor_dd04f3a8f0c5288b = []byte{ + // 1838 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x98, 0xdd, 0x52, 0xe3, 0xc8, + 0x15, 0x80, 0x6d, 0x6c, 0xc0, 0x1c, 0xff, 0xa2, 0x21, 0xac, 0xf8, 0x67, 0x95, 0xa4, 0x8a, 0x4d, + 0x2a, 0xb8, 0xc6, 0x33, 0xec, 0x90, 0xdd, 0xad, 0xdd, 0x2c, 0x04, 0xd6, 0x53, 0x99, 0x1a, 0x88, + 0x5c, 0x09, 0xa9, 0xe4, 0xc2, 0x25, 0xa4, 0xb6, 0x71, 0x21, 0x4b, 0x8a, 0x24, 0x43, 0x71, 0x93, + 0x07, 0xc8, 0x75, 0xee, 0x72, 0x91, 0x87, 0xca, 0x03, 0xe4, 0x09, 0xf2, 0x0e, 0xa9, 0xd3, 0xdd, + 0x92, 0xba, 0x45, 0x8b, 0xc1, 0x29, 0x6a, 0xef, 0xd4, 0xad, 0xf3, 0xd7, 0xe7, 0x74, 0x7f, 0x3a, + 0x2d, 0xf8, 0x65, 0x70, 0x3b, 0xee, 0x46, 0xc4, 0x8b, 0xc3, 0x87, 0x6e, 0xe4, 0xdb, 0xb7, 0x24, + 0xee, 0x86, 0x81, 0x3d, 0xf1, 0x48, 0xdc, 0x8d, 0x1e, 0x22, 0xdb, 0x72, 0xdd, 0x61, 0x18, 0xd8, + 0x87, 0x41, 0xe8, 0xc7, 0xbe, 0x56, 0x17, 0xa6, 0x8c, 0xbf, 0x97, 0xa1, 0x35, 0x20, 0x9e, 0x33, + 0x8d, 0xc6, 0x26, 0xf9, 0xeb, 0x8c, 0x44, 0xb1, 0xd6, 0x82, 0x85, 0x91, 0xa3, 0x97, 0xf7, 0xcb, + 0x07, 0x4d, 0x73, 0x61, 0xe4, 0x68, 0xeb, 0x50, 0x75, 0xac, 0xd8, 0xd2, 0x17, 0xf6, 0xcb, 0x07, + 0x8d, 0x93, 0x85, 0x5a, 0xd9, 0xa4, 0x63, 0x4d, 0x87, 0x65, 0xcb, 0x71, 0x42, 0x12, 0x45, 0x7a, + 0x05, 0x5f, 0x99, 0xc9, 0x50, 0xd3, 0xa0, 0x3a, 0xf5, 0x43, 0xa2, 0x57, 0xf7, 0xcb, 0x07, 0x35, + 0x93, 0x3e, 0x6b, 0x06, 0x34, 0x89, 0xe7, 0x0c, 0xfd, 0xd1, 0x30, 0x24, 0xb6, 0x1f, 0x3a, 0xfa, + 0x22, 0x7d, 0x59, 0x27, 0x9e, 0x73, 0x31, 0x32, 0xe9, 0x94, 0xf1, 0x67, 0x68, 0xa7, 0xb1, 0x44, + 0x81, 0xef, 0x45, 0x44, 0xfb, 0x29, 0x34, 0x48, 0x18, 0xfa, 0xe1, 0xd0, 0x9b, 0x4d, 0xaf, 0x49, + 0xc8, 0xc2, 0xea, 0x97, 0xcc, 0x3a, 0x9d, 0xfd, 0x48, 0x27, 0x35, 0x1d, 0x96, 0x5c, 0xe2, 0x8d, + 0xe3, 0x1b, 0x1a, 0x23, 0xbe, 0xe6, 0xe3, 0x93, 0x1a, 0x2c, 0x85, 0x24, 0x9a, 0xb9, 0xb1, 0x71, + 0x02, 0x8d, 0xf7, 0x17, 0xa7, 0xb1, 0x5b, 0xb4, 0xca, 0x0e, 0x54, 0xec, 0xa9, 0xc3, 0x0c, 0x98, + 0xf8, 0x88, 0x33, 0x56, 0x38, 0xe6, 0x6b, 0xc3, 0x47, 0xe3, 0x8f, 0xd0, 0xe4, 0x36, 0xe6, 0x89, + 0x6e, 0x1d, 0x16, 0xef, 0x2c, 0x77, 0x46, 0x58, 0x02, 0xfb, 0x25, 0x93, 0x0d, 0x85, 0xd8, 0xfe, + 0x59, 0x86, 0x96, 0x49, 0xec, 0xbb, 0x27, 0x8b, 0x20, 0x2d, 0x31, 0x59, 0x20, 0xce, 0x47, 0xc4, + 0x73, 0x48, 0x48, 0xe3, 0xac, 0x99, 0x7c, 0x84, 0x25, 0x08, 0x08, 0xb9, 0x4d, 0x4a, 0x80, 0xcf, + 0xda, 0x1a, 0x2c, 0xc6, 0xe1, 0xcc, 0xb3, 0x79, 0xea, 0xd9, 0x40, 0xdb, 0x83, 0xba, 0x3d, 0x8d, + 0xc6, 0x43, 0x6e, 0x7e, 0x89, 0x9a, 0x07, 0x9c, 0xfa, 0x40, 0x67, 0x8c, 0xdf, 0x41, 0xfd, 0x22, + 0x20, 0x5e, 0x12, 0x19, 0x5a, 0xb6, 0xe2, 0x1b, 0x1a, 0x5b, 0xc3, 0xa4, 0xcf, 0x68, 0x79, 0xe4, + 0x5a, 0xe3, 0x88, 0x07, 0xc7, 0x06, 0x6c, 0x1b, 0x38, 0x84, 0x46, 0xd6, 0x34, 0xe9, 0xb3, 0x71, + 0x01, 0x0d, 0x66, 0x6c, 0x9e, 0x0c, 0x76, 0x68, 0x32, 0x92, 0xda, 0x2e, 0x8c, 0x1c, 0x21, 0x77, + 0x47, 0x50, 0x37, 0x89, 0xe5, 0xcc, 0x99, 0x37, 0xe3, 0x0a, 0x1a, 0x4c, 0x6d, 0xbe, 0x7d, 0x96, + 0x3b, 0x09, 0xfd, 0x12, 0x3b, 0x0b, 0x42, 0x3c, 0x3f, 0x87, 0x36, 0x1a, 0x3e, 0x9f, 0xb8, 0x44, + 0x95, 0xb1, 0x15, 0x96, 0x31, 0xe3, 0x2f, 0xd0, 0xc9, 0xc4, 0x5e, 0x3a, 0x86, 0x2f, 0xa1, 0x71, + 0x15, 0x4e, 0x62, 0x32, 0xe7, 0x89, 0x36, 0xfe, 0x04, 0x4d, 0xae, 0xf7, 0xd2, 0xa7, 0xef, 0x37, + 0xd0, 0xa1, 0x96, 0x3f, 0x91, 0x16, 0x64, 0x8a, 0xed, 0x7b, 0x31, 0xf1, 0x62, 0x16, 0x9c, 0x99, + 0x0c, 0x8d, 0x4b, 0x58, 0x15, 0x2c, 0xf0, 0xf8, 0x3e, 0x57, 0xc5, 0x97, 0x8f, 0x6e, 0xf9, 0x3e, + 0x9c, 0xc4, 0x31, 0xf1, 0xf8, 0x0e, 0x48, 0x86, 0xc6, 0x29, 0xb4, 0xbf, 0x67, 0xc0, 0x4a, 0xed, + 0x09, 0x48, 0x2b, 0xcb, 0x48, 0x2b, 0xda, 0x47, 0xff, 0x5a, 0xc0, 0x7a, 0xf3, 0xa3, 0x3b, 0x4f, + 0xd6, 0xce, 0x61, 0x39, 0xb0, 0x1e, 0x5c, 0xdf, 0x62, 0x1b, 0xbb, 0xde, 0xfb, 0xc5, 0xa1, 0x88, + 0xea, 0x9c, 0xcd, 0x43, 0x93, 0xe6, 0xf1, 0x92, 0x69, 0xf4, 0x4b, 0x66, 0xa2, 0xbc, 0xf9, 0x8f, + 0x32, 0x34, 0xa5, 0x97, 0x69, 0x75, 0xcb, 0x39, 0x5e, 0x7f, 0x99, 0x2d, 0x8e, 0x79, 0xdc, 0x96, + 0x3c, 0xe6, 0x72, 0xa1, 0x5a, 0x7a, 0x45, 0x42, 0xcf, 0x16, 0xac, 0x50, 0x70, 0x50, 0x67, 0x55, + 0x9a, 0xae, 0x1a, 0x4e, 0xfc, 0x56, 0xde, 0x8c, 0xef, 0xa0, 0x7e, 0x32, 0xf1, 0x0a, 0x0f, 0xa8, + 0x2e, 0x47, 0x95, 0xa5, 0xdc, 0x78, 0x0d, 0x0d, 0xa6, 0xf8, 0xec, 0x62, 0x1b, 0xef, 0xa1, 0xf9, + 0xbd, 0x6d, 0x93, 0x20, 0x2e, 0xf2, 0xc6, 0xb0, 0x18, 0x52, 0x57, 0x0c, 0x8b, 0x61, 0x06, 0x2f, + 0x5c, 0x5e, 0x85, 0xc3, 0xcb, 0xf8, 0x4f, 0x19, 0x5a, 0x89, 0xad, 0x79, 0xea, 0x7a, 0x96, 0xaf, + 0xeb, 0x17, 0x72, 0x96, 0x25, 0x93, 0xc5, 0x65, 0xbd, 0xca, 0x57, 0x35, 0xbf, 0x92, 0xff, 0xb3, + 0x9a, 0x42, 0x61, 0xbe, 0x82, 0xd6, 0xa9, 0xef, 0x79, 0xc4, 0x8e, 0xe7, 0xaf, 0xcd, 0x5b, 0x68, + 0xa7, 0xba, 0xcf, 0x2f, 0xcf, 0xaf, 0xa1, 0xf9, 0x61, 0x12, 0xc5, 0xd9, 0xb7, 0x44, 0xe1, 0xf0, + 0xda, 0xb2, 0x6f, 0x5d, 0x7f, 0x4c, 0x1d, 0x56, 0xcc, 0x64, 0x68, 0xbc, 0x81, 0x56, 0xa2, 0xfa, + 0x7c, 0x7f, 0x6f, 0xa0, 0x3d, 0xb8, 0x99, 0xc5, 0x8e, 0x7f, 0xef, 0x3d, 0xf1, 0xd9, 0xbf, 0xf1, + 0xef, 0xb9, 0x37, 0x7c, 0x34, 0x8e, 0xa0, 0x93, 0x29, 0x3d, 0xdf, 0xd7, 0x2e, 0x34, 0x4e, 0x5d, + 0x3f, 0x2a, 0x62, 0xae, 0xd1, 0x83, 0x26, 0x7f, 0xff, 0x7c, 0x9b, 0x04, 0x56, 0x7f, 0x20, 0xf1, + 0xc0, 0xb7, 0x6f, 0x2f, 0x8a, 0xb7, 0xf4, 0x1a, 0x2c, 0xba, 0xe4, 0x8e, 0xb8, 0x7c, 0x0d, 0x6c, + 0x80, 0x1b, 0xdd, 0xb3, 0xa6, 0x84, 0xef, 0x69, 0xfa, 0x2c, 0x1c, 0xe4, 0x6a, 0xee, 0x5b, 0xa8, + 0x89, 0x6e, 0xe6, 0xd9, 0xed, 0x1a, 0x54, 0xfc, 0x20, 0x4e, 0x3b, 0x1b, 0x1c, 0x08, 0x3b, 0x6c, + 0x08, 0xab, 0x83, 0x17, 0x8c, 0xbf, 0xc3, 0x9c, 0x31, 0xd4, 0xe0, 0xa3, 0xf1, 0x0e, 0xb4, 0xc1, + 0xe3, 0xc8, 0x9f, 0x91, 0xd9, 0x9f, 0xa5, 0x4b, 0xfe, 0x68, 0x4d, 0x0b, 0x6b, 0xf6, 0x37, 0x78, + 0x25, 0x49, 0xcd, 0x93, 0x99, 0xe3, 0xb9, 0xce, 0x27, 0x1e, 0xfd, 0xc7, 0x27, 0x94, 0x45, 0x79, + 0x49, 0x48, 0xf8, 0xe9, 0x28, 0x33, 0xa9, 0x1f, 0x3b, 0xca, 0x2b, 0x68, 0x0e, 0xe8, 0x9d, 0x23, + 0x09, 0x70, 0x1d, 0x96, 0x46, 0xd6, 0x74, 0xe2, 0x3e, 0x50, 0x9f, 0x15, 0x93, 0x8f, 0xb0, 0xa6, + 0xf1, 0x43, 0x40, 0x78, 0xa1, 0xe9, 0xb3, 0xb6, 0x09, 0x35, 0x7a, 0x2b, 0xb1, 0x7d, 0x97, 0xd7, + 0x3a, 0x1d, 0x1b, 0xbf, 0x87, 0x56, 0x62, 0xf8, 0xa5, 0xba, 0xc5, 0x3f, 0x40, 0xe7, 0x2c, 0xf0, + 0x5d, 0xf7, 0xca, 0x9a, 0x14, 0x6e, 0xc8, 0x1d, 0x00, 0x6f, 0x36, 0x1d, 0x92, 0x3b, 0xe2, 0xc5, + 0x49, 0x47, 0xbb, 0xe2, 0xcd, 0xa6, 0x67, 0x74, 0x82, 0x76, 0xb5, 0x11, 0xb1, 0x69, 0xb4, 0x9a, + 0x49, 0x9f, 0x8d, 0xb7, 0x00, 0xd4, 0x2c, 0x15, 0x51, 0xf5, 0xa0, 0x92, 0x31, 0x3e, 0x32, 0xbe, + 0x85, 0x7a, 0xa6, 0x15, 0x69, 0xdd, 0x54, 0xac, 0xbc, 0x5f, 0x39, 0xa8, 0xf7, 0x3e, 0x93, 0x4a, + 0x91, 0x49, 0xa6, 0xfa, 0x77, 0xb0, 0x2a, 0x2c, 0x66, 0x9e, 0x14, 0xf5, 0xa4, 0x88, 0xea, 0x3d, + 0xbd, 0xc0, 0x55, 0x84, 0xcd, 0x1c, 0x93, 0x14, 0x92, 0x18, 0x43, 0x9b, 0x8a, 0x08, 0xb7, 0x29, + 0x0d, 0xaa, 0x24, 0x48, 0x17, 0x4d, 0x9f, 0x31, 0x0d, 0x7e, 0xc0, 0x8b, 0xbd, 0xe0, 0x07, 0x3c, + 0x2d, 0x95, 0x34, 0x2d, 0xbf, 0x82, 0x45, 0x6a, 0x9a, 0x1e, 0xe8, 0x27, 0x96, 0xcb, 0xa4, 0x90, + 0xcb, 0x99, 0xd7, 0xe7, 0x9f, 0xf4, 0x2f, 0xe0, 0x15, 0x53, 0x0b, 0x89, 0x15, 0x93, 0xd7, 0x42, + 0xc0, 0xf8, 0x9d, 0xe7, 0x3b, 0x94, 0x3e, 0x1b, 0x57, 0xb0, 0x26, 0x8b, 0xbe, 0xe0, 0x1d, 0xe5, + 0xd2, 0x77, 0xdd, 0x27, 0xee, 0x28, 0xca, 0xfd, 0x71, 0x05, 0x0d, 0xa6, 0x36, 0x67, 0x37, 0x2e, + 0x1a, 0x53, 0x16, 0xf0, 0xdf, 0x00, 0xad, 0x01, 0x4b, 0x76, 0x12, 0xd3, 0x5b, 0x58, 0x62, 0x3f, + 0x0e, 0xa8, 0xd5, 0x7a, 0x6f, 0x53, 0xaa, 0x86, 0x74, 0xbe, 0xd1, 0x24, 0x93, 0xd5, 0xde, 0xc1, + 0x72, 0xc4, 0x2e, 0xec, 0x7c, 0x23, 0x6d, 0xc9, 0x6a, 0xd2, 0x8f, 0x05, 0xa4, 0x07, 0x97, 0x46, + 0xc5, 0x90, 0x75, 0xb8, 0x74, 0x43, 0xe4, 0x15, 0xe5, 0xcb, 0x30, 0x2a, 0x72, 0x69, 0xed, 0x10, + 0xaa, 0xd7, 0x13, 0xcf, 0xe1, 0x7b, 0x46, 0xde, 0xb7, 0x42, 0x9b, 0x89, 0x97, 0x22, 0x94, 0xc3, + 0x75, 0x59, 0xb4, 0xe5, 0xa2, 0x97, 0xde, 0xfc, 0xba, 0xa4, 0x66, 0x11, 0xd7, 0xc5, 0x64, 0x31, + 0x3c, 0x9b, 0xb5, 0x37, 0xf4, 0x3e, 0x9c, 0x0f, 0x4f, 0x6e, 0x9b, 0x30, 0x3c, 0x2e, 0x8d, 0xee, + 0x5c, 0xda, 0xa6, 0xe8, 0xcb, 0x0a, 0x77, 0x52, 0xf3, 0x43, 0xef, 0x49, 0x74, 0x42, 0xfb, 0x0a, + 0x6a, 0x11, 0x6f, 0x39, 0xf4, 0x9a, 0x02, 0xc3, 0xb9, 0x26, 0xa6, 0x5f, 0x32, 0x53, 0x79, 0xed, + 0x35, 0x2c, 0xda, 0xd8, 0x57, 0xe8, 0x2b, 0x54, 0x71, 0x43, 0x0e, 0x54, 0xe8, 0x48, 0xfa, 0x25, + 0x93, 0x49, 0x6a, 0x27, 0xd0, 0x18, 0x93, 0x78, 0x88, 0x35, 0x1c, 0xe2, 0x07, 0x15, 0xa8, 0xe6, + 0xae, 0xa4, 0xf9, 0xa8, 0xef, 0xe8, 0x97, 0x4c, 0x18, 0xa7, 0x93, 0x68, 0x23, 0x12, 0x6d, 0xd4, + 0x15, 0x36, 0x06, 0x2a, 0x1b, 0x51, 0x66, 0xe3, 0x0c, 0x9a, 0x69, 0x1c, 0xf4, 0x63, 0xdf, 0xa0, + 0x46, 0xf6, 0x54, 0x81, 0x08, 0x1f, 0x40, 0xdc, 0xf1, 0xe3, 0x6c, 0x36, 0x31, 0x83, 0xbd, 0x3c, + 0x33, 0xd3, 0x54, 0x9b, 0xc9, 0x7d, 0x47, 0xb9, 0x99, 0x64, 0x56, 0xfb, 0x16, 0x80, 0xe0, 0xe9, + 0x1f, 0xde, 0x5b, 0x93, 0x58, 0x6f, 0x51, 0x1b, 0x3b, 0x8f, 0x99, 0x24, 0x7c, 0x39, 0xfa, 0x25, + 0x73, 0x85, 0x24, 0x73, 0xda, 0xd7, 0xc0, 0x06, 0x43, 0x3b, 0x76, 0xf5, 0xb6, 0xa2, 0x8a, 0x39, + 0x66, 0x62, 0x15, 0x09, 0x9f, 0xd2, 0x7e, 0x80, 0x26, 0x57, 0x66, 0xec, 0xd1, 0x3b, 0xd4, 0xc0, + 0xbe, 0xc2, 0x80, 0xc4, 0xb1, 0x7e, 0xc9, 0x6c, 0x10, 0x61, 0x1a, 0xcf, 0x07, 0x0e, 0xf5, 0x55, + 0xc5, 0xf9, 0x10, 0x18, 0x84, 0xe7, 0x03, 0xe5, 0x50, 0x3e, 0x24, 0x96, 0xa3, 0x6b, 0x0a, 0x79, + 0xe1, 0xbf, 0x0a, 0xca, 0xa3, 0x1c, 0x6e, 0x37, 0xbc, 0x3f, 0x13, 0xfd, 0x95, 0x62, 0xbb, 0x89, + 0x3f, 0x1d, 0x70, 0xbb, 0x51, 0x49, 0x74, 0xe1, 0x07, 0xc4, 0xd3, 0xd7, 0x14, 0x2e, 0x84, 0x1f, + 0x4b, 0xe8, 0x02, 0xe5, 0xd0, 0xc5, 0xc4, 0xc7, 0x24, 0xfe, 0x44, 0xe1, 0x42, 0xfc, 0x87, 0x87, + 0x2e, 0xa8, 0x24, 0xd6, 0x8e, 0xfa, 0x1a, 0x8e, 0x26, 0x2e, 0xd1, 0xd7, 0x15, 0xb5, 0xcb, 0xff, + 0x7d, 0xc0, 0xda, 0xdd, 0x27, 0x73, 0x58, 0x3b, 0x5c, 0x1d, 0x53, 0xff, 0x4c, 0x51, 0xbb, 0xdc, + 0x2f, 0x1d, 0xac, 0x5d, 0xc8, 0xa7, 0x4e, 0x96, 0xa0, 0x6a, 0x85, 0xe3, 0xc8, 0xf8, 0x2f, 0x40, + 0x3b, 0xa5, 0x2a, 0x47, 0xf6, 0x51, 0x0e, 0xab, 0x5b, 0x4a, 0xac, 0xa6, 0xdd, 0x55, 0xc2, 0xd5, + 0xe3, 0x3c, 0x57, 0xb7, 0xd5, 0x5c, 0xcd, 0xda, 0xb2, 0x04, 0xac, 0xc7, 0x79, 0xb0, 0x6e, 0x3f, + 0xf5, 0x5b, 0x41, 0x24, 0x6b, 0x57, 0x22, 0xeb, 0x86, 0x82, 0xac, 0xa9, 0x0e, 0x43, 0xeb, 0x51, + 0x0e, 0xad, 0x5b, 0x4f, 0x5c, 0x74, 0x05, 0xb6, 0x1e, 0xe7, 0xd9, 0xba, 0xad, 0x66, 0x6b, 0x16, + 0x61, 0x02, 0xd7, 0xa3, 0x1c, 0x5c, 0xb7, 0x94, 0x70, 0xcd, 0x1c, 0x72, 0xba, 0x7e, 0xfd, 0x88, + 0xae, 0x3b, 0x05, 0x74, 0x4d, 0x55, 0x33, 0xbc, 0xf6, 0x64, 0xbc, 0x6e, 0xaa, 0xf0, 0x9a, 0xaa, + 0x71, 0xbe, 0x9e, 0x2a, 0xf9, 0xba, 0x57, 0xc8, 0xd7, 0x54, 0x5f, 0x04, 0xec, 0xa9, 0x12, 0xb0, + 0x7b, 0x85, 0x80, 0xcd, 0x8c, 0x08, 0x84, 0x3d, 0x57, 0x13, 0x76, 0xbf, 0x98, 0xb0, 0xa9, 0x19, + 0x09, 0xb1, 0xe7, 0x6a, 0xc4, 0xee, 0x17, 0x23, 0x56, 0xb2, 0x93, 0x32, 0xf6, 0x3b, 0x05, 0x63, + 0x77, 0x8b, 0x18, 0x9b, 0x9a, 0x10, 0x20, 0xfb, 0xcd, 0x63, 0xc8, 0xee, 0x14, 0x40, 0x36, 0x2b, + 0x66, 0x4a, 0xd9, 0xbe, 0x9a, 0xb2, 0x9f, 0x3f, 0x41, 0xd9, 0xd4, 0x8a, 0x8c, 0xd9, 0xae, 0x84, + 0xd9, 0x0d, 0x05, 0x66, 0xb3, 0xc3, 0x42, 0x39, 0xdb, 0x95, 0x38, 0xbb, 0xa1, 0xe0, 0x6c, 0xa6, + 0x40, 0x41, 0xdb, 0x93, 0x41, 0xbb, 0xa9, 0x02, 0x6d, 0xb6, 0xf1, 0x18, 0x69, 0xbb, 0x12, 0x69, + 0x37, 0x14, 0xa4, 0xcd, 0x9c, 0x50, 0xd4, 0xf6, 0x64, 0xd4, 0x6e, 0xaa, 0x50, 0x9b, 0x39, 0x61, + 0xac, 0xfd, 0x4e, 0xc1, 0xda, 0xdd, 0x22, 0xd6, 0x66, 0x35, 0xcc, 0x60, 0xfb, 0xcd, 0x63, 0xd8, + 0xee, 0x14, 0xc0, 0x36, 0xab, 0x61, 0x4a, 0xdb, 0xb4, 0x8b, 0xbd, 0x5e, 0xa2, 0x17, 0xc5, 0x37, + 0xff, 0x0b, 0x00, 0x00, 0xff, 0xff, 0xd5, 0x22, 0x29, 0x56, 0xfd, 0x1a, 0x00, 0x00, +} diff --git a/pkg/sentry/socket/socket_state_autogen.go b/pkg/sentry/socket/socket_state_autogen.go new file mode 100755 index 000000000..3df219247 --- /dev/null +++ b/pkg/sentry/socket/socket_state_autogen.go @@ -0,0 +1,24 @@ +// automatically generated by stateify. + +package socket + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *SendReceiveTimeout) beforeSave() {} +func (x *SendReceiveTimeout) save(m state.Map) { + x.beforeSave() + m.Save("send", &x.send) + m.Save("recv", &x.recv) +} + +func (x *SendReceiveTimeout) afterLoad() {} +func (x *SendReceiveTimeout) load(m state.Map) { + m.Load("send", &x.send) + m.Load("recv", &x.recv) +} + +func init() { + state.Register("socket.SendReceiveTimeout", (*SendReceiveTimeout)(nil), state.Fns{Save: (*SendReceiveTimeout).save, Load: (*SendReceiveTimeout).load}) +} diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD deleted file mode 100644 index da9977fde..000000000 --- a/pkg/sentry/socket/unix/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "unix", - srcs = [ - "device.go", - "io.go", - "unix.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/refs", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", - "//pkg/sentry/socket", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/epsocket", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD deleted file mode 100644 index 0b0240336..000000000 --- a/pkg/sentry/socket/unix/transport/BUILD +++ /dev/null @@ -1,40 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "transport_message_list", - out = "transport_message_list.go", - package = "transport", - prefix = "message", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*message", - "Linker": "*message", - }, -) - -go_library( - name = "transport", - srcs = [ - "connectioned.go", - "connectioned_state.go", - "connectionless.go", - "queue.go", - "transport_message_list.go", - "unix.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/ilist", - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/socket/unix/transport/transport_message_list.go b/pkg/sentry/socket/unix/transport/transport_message_list.go new file mode 100755 index 000000000..6d394860c --- /dev/null +++ b/pkg/sentry/socket/unix/transport/transport_message_list.go @@ -0,0 +1,173 @@ +package transport + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type messageElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (messageElementMapper) linkerFor(elem *message) *message { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type messageList struct { + head *message + tail *message +} + +// Reset resets list l to the empty state. +func (l *messageList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *messageList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *messageList) Front() *message { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *messageList) Back() *message { + return l.tail +} + +// PushFront inserts the element e at the front of list l. +func (l *messageList) PushFront(e *message) { + messageElementMapper{}.linkerFor(e).SetNext(l.head) + messageElementMapper{}.linkerFor(e).SetPrev(nil) + + if l.head != nil { + messageElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *messageList) PushBack(e *message) { + messageElementMapper{}.linkerFor(e).SetNext(nil) + messageElementMapper{}.linkerFor(e).SetPrev(l.tail) + + if l.tail != nil { + messageElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *messageList) PushBackList(m *messageList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + messageElementMapper{}.linkerFor(l.tail).SetNext(m.head) + messageElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *messageList) InsertAfter(b, e *message) { + a := messageElementMapper{}.linkerFor(b).Next() + messageElementMapper{}.linkerFor(e).SetNext(a) + messageElementMapper{}.linkerFor(e).SetPrev(b) + messageElementMapper{}.linkerFor(b).SetNext(e) + + if a != nil { + messageElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *messageList) InsertBefore(a, e *message) { + b := messageElementMapper{}.linkerFor(a).Prev() + messageElementMapper{}.linkerFor(e).SetNext(a) + messageElementMapper{}.linkerFor(e).SetPrev(b) + messageElementMapper{}.linkerFor(a).SetPrev(e) + + if b != nil { + messageElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *messageList) Remove(e *message) { + prev := messageElementMapper{}.linkerFor(e).Prev() + next := messageElementMapper{}.linkerFor(e).Next() + + if prev != nil { + messageElementMapper{}.linkerFor(prev).SetNext(next) + } else { + l.head = next + } + + if next != nil { + messageElementMapper{}.linkerFor(next).SetPrev(prev) + } else { + l.tail = prev + } +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type messageEntry struct { + next *message + prev *message +} + +// Next returns the entry that follows e in the list. +func (e *messageEntry) Next() *message { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *messageEntry) Prev() *message { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *messageEntry) SetNext(elem *message) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *messageEntry) SetPrev(elem *message) { + e.prev = elem +} diff --git a/pkg/sentry/socket/unix/transport/transport_state_autogen.go b/pkg/sentry/socket/unix/transport/transport_state_autogen.go new file mode 100755 index 000000000..83eedb711 --- /dev/null +++ b/pkg/sentry/socket/unix/transport/transport_state_autogen.go @@ -0,0 +1,191 @@ +// automatically generated by stateify. + +package transport + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *connectionedEndpoint) beforeSave() {} +func (x *connectionedEndpoint) save(m state.Map) { + x.beforeSave() + var acceptedChan []*connectionedEndpoint = x.saveAcceptedChan() + m.SaveValue("acceptedChan", acceptedChan) + m.Save("baseEndpoint", &x.baseEndpoint) + m.Save("id", &x.id) + m.Save("idGenerator", &x.idGenerator) + m.Save("stype", &x.stype) +} + +func (x *connectionedEndpoint) afterLoad() {} +func (x *connectionedEndpoint) load(m state.Map) { + m.Load("baseEndpoint", &x.baseEndpoint) + m.Load("id", &x.id) + m.Load("idGenerator", &x.idGenerator) + m.Load("stype", &x.stype) + m.LoadValue("acceptedChan", new([]*connectionedEndpoint), func(y interface{}) { x.loadAcceptedChan(y.([]*connectionedEndpoint)) }) +} + +func (x *connectionlessEndpoint) beforeSave() {} +func (x *connectionlessEndpoint) save(m state.Map) { + x.beforeSave() + m.Save("baseEndpoint", &x.baseEndpoint) +} + +func (x *connectionlessEndpoint) afterLoad() {} +func (x *connectionlessEndpoint) load(m state.Map) { + m.Load("baseEndpoint", &x.baseEndpoint) +} + +func (x *queue) beforeSave() {} +func (x *queue) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("ReaderQueue", &x.ReaderQueue) + m.Save("WriterQueue", &x.WriterQueue) + m.Save("closed", &x.closed) + m.Save("used", &x.used) + m.Save("limit", &x.limit) + m.Save("dataList", &x.dataList) +} + +func (x *queue) afterLoad() {} +func (x *queue) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("ReaderQueue", &x.ReaderQueue) + m.Load("WriterQueue", &x.WriterQueue) + m.Load("closed", &x.closed) + m.Load("used", &x.used) + m.Load("limit", &x.limit) + m.Load("dataList", &x.dataList) +} + +func (x *messageList) beforeSave() {} +func (x *messageList) save(m state.Map) { + x.beforeSave() + m.Save("head", &x.head) + m.Save("tail", &x.tail) +} + +func (x *messageList) afterLoad() {} +func (x *messageList) load(m state.Map) { + m.Load("head", &x.head) + m.Load("tail", &x.tail) +} + +func (x *messageEntry) beforeSave() {} +func (x *messageEntry) save(m state.Map) { + x.beforeSave() + m.Save("next", &x.next) + m.Save("prev", &x.prev) +} + +func (x *messageEntry) afterLoad() {} +func (x *messageEntry) load(m state.Map) { + m.Load("next", &x.next) + m.Load("prev", &x.prev) +} + +func (x *ControlMessages) beforeSave() {} +func (x *ControlMessages) save(m state.Map) { + x.beforeSave() + m.Save("Rights", &x.Rights) + m.Save("Credentials", &x.Credentials) +} + +func (x *ControlMessages) afterLoad() {} +func (x *ControlMessages) load(m state.Map) { + m.Load("Rights", &x.Rights) + m.Load("Credentials", &x.Credentials) +} + +func (x *message) beforeSave() {} +func (x *message) save(m state.Map) { + x.beforeSave() + m.Save("messageEntry", &x.messageEntry) + m.Save("Data", &x.Data) + m.Save("Control", &x.Control) + m.Save("Address", &x.Address) +} + +func (x *message) afterLoad() {} +func (x *message) load(m state.Map) { + m.Load("messageEntry", &x.messageEntry) + m.Load("Data", &x.Data) + m.Load("Control", &x.Control) + m.Load("Address", &x.Address) +} + +func (x *queueReceiver) beforeSave() {} +func (x *queueReceiver) save(m state.Map) { + x.beforeSave() + m.Save("readQueue", &x.readQueue) +} + +func (x *queueReceiver) afterLoad() {} +func (x *queueReceiver) load(m state.Map) { + m.Load("readQueue", &x.readQueue) +} + +func (x *streamQueueReceiver) beforeSave() {} +func (x *streamQueueReceiver) save(m state.Map) { + x.beforeSave() + m.Save("queueReceiver", &x.queueReceiver) + m.Save("buffer", &x.buffer) + m.Save("control", &x.control) + m.Save("addr", &x.addr) +} + +func (x *streamQueueReceiver) afterLoad() {} +func (x *streamQueueReceiver) load(m state.Map) { + m.Load("queueReceiver", &x.queueReceiver) + m.Load("buffer", &x.buffer) + m.Load("control", &x.control) + m.Load("addr", &x.addr) +} + +func (x *connectedEndpoint) beforeSave() {} +func (x *connectedEndpoint) save(m state.Map) { + x.beforeSave() + m.Save("endpoint", &x.endpoint) + m.Save("writeQueue", &x.writeQueue) +} + +func (x *connectedEndpoint) afterLoad() {} +func (x *connectedEndpoint) load(m state.Map) { + m.Load("endpoint", &x.endpoint) + m.Load("writeQueue", &x.writeQueue) +} + +func (x *baseEndpoint) beforeSave() {} +func (x *baseEndpoint) save(m state.Map) { + x.beforeSave() + m.Save("Queue", &x.Queue) + m.Save("passcred", &x.passcred) + m.Save("receiver", &x.receiver) + m.Save("connected", &x.connected) + m.Save("path", &x.path) +} + +func (x *baseEndpoint) afterLoad() {} +func (x *baseEndpoint) load(m state.Map) { + m.Load("Queue", &x.Queue) + m.Load("passcred", &x.passcred) + m.Load("receiver", &x.receiver) + m.Load("connected", &x.connected) + m.Load("path", &x.path) +} + +func init() { + state.Register("transport.connectionedEndpoint", (*connectionedEndpoint)(nil), state.Fns{Save: (*connectionedEndpoint).save, Load: (*connectionedEndpoint).load}) + state.Register("transport.connectionlessEndpoint", (*connectionlessEndpoint)(nil), state.Fns{Save: (*connectionlessEndpoint).save, Load: (*connectionlessEndpoint).load}) + state.Register("transport.queue", (*queue)(nil), state.Fns{Save: (*queue).save, Load: (*queue).load}) + state.Register("transport.messageList", (*messageList)(nil), state.Fns{Save: (*messageList).save, Load: (*messageList).load}) + state.Register("transport.messageEntry", (*messageEntry)(nil), state.Fns{Save: (*messageEntry).save, Load: (*messageEntry).load}) + state.Register("transport.ControlMessages", (*ControlMessages)(nil), state.Fns{Save: (*ControlMessages).save, Load: (*ControlMessages).load}) + state.Register("transport.message", (*message)(nil), state.Fns{Save: (*message).save, Load: (*message).load}) + state.Register("transport.queueReceiver", (*queueReceiver)(nil), state.Fns{Save: (*queueReceiver).save, Load: (*queueReceiver).load}) + state.Register("transport.streamQueueReceiver", (*streamQueueReceiver)(nil), state.Fns{Save: (*streamQueueReceiver).save, Load: (*streamQueueReceiver).load}) + state.Register("transport.connectedEndpoint", (*connectedEndpoint)(nil), state.Fns{Save: (*connectedEndpoint).save, Load: (*connectedEndpoint).load}) + state.Register("transport.baseEndpoint", (*baseEndpoint)(nil), state.Fns{Save: (*baseEndpoint).save, Load: (*baseEndpoint).load}) +} diff --git a/pkg/sentry/socket/unix/unix_state_autogen.go b/pkg/sentry/socket/unix/unix_state_autogen.go new file mode 100755 index 000000000..86f734156 --- /dev/null +++ b/pkg/sentry/socket/unix/unix_state_autogen.go @@ -0,0 +1,28 @@ +// automatically generated by stateify. + +package unix + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *SocketOperations) beforeSave() {} +func (x *SocketOperations) save(m state.Map) { + x.beforeSave() + m.Save("AtomicRefCount", &x.AtomicRefCount) + m.Save("SendReceiveTimeout", &x.SendReceiveTimeout) + m.Save("ep", &x.ep) + m.Save("stype", &x.stype) +} + +func (x *SocketOperations) afterLoad() {} +func (x *SocketOperations) load(m state.Map) { + m.Load("AtomicRefCount", &x.AtomicRefCount) + m.Load("SendReceiveTimeout", &x.SendReceiveTimeout) + m.Load("ep", &x.ep) + m.Load("stype", &x.stype) +} + +func init() { + state.Register("unix.SocketOperations", (*SocketOperations)(nil), state.Fns{Save: (*SocketOperations).save, Load: (*SocketOperations).load}) +} diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD deleted file mode 100644 index 88765f4d6..000000000 --- a/pkg/sentry/state/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "state", - srcs = [ - "state.go", - "state_metadata.go", - "state_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/state", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/time", - "//pkg/sentry/watchdog", - "//pkg/state/statefile", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/state/state_state_autogen.go b/pkg/sentry/state/state_state_autogen.go new file mode 100755 index 000000000..6c0d9b7a7 --- /dev/null +++ b/pkg/sentry/state/state_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package state + diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD deleted file mode 100644 index 445d25010..000000000 --- a/pkg/sentry/strace/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "strace", - srcs = [ - "capability.go", - "clone.go", - "futex.go", - "linux64.go", - "open.go", - "poll.go", - "ptrace.go", - "signal.go", - "socket.go", - "strace.go", - "syscalls.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/strace", - visibility = ["//:sandbox"], - deps = [ - ":strace_go_proto", - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/bits", - "//pkg/eventchannel", - "//pkg/seccomp", - "//pkg/sentry/arch", - "//pkg/sentry/kernel", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/epsocket", - "//pkg/sentry/socket/netlink", - "//pkg/sentry/syscalls/linux", - "//pkg/sentry/usermem", - ], -) - -proto_library( - name = "strace_proto", - srcs = ["strace.proto"], - visibility = ["//visibility:public"], -) - -go_proto_library( - name = "strace_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto", - proto = ":strace_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/strace/strace.proto b/pkg/sentry/strace/strace.proto deleted file mode 100644 index 4b2f73a5f..000000000 --- a/pkg/sentry/strace/strace.proto +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -message Strace { - // Process name that made the syscall. - string process = 1; - - // Syscall function name. - string function = 2; - - // List of syscall arguments formatted as strings. - repeated string args = 3; - - oneof info { - StraceEnter enter = 4; - StraceExit exit = 5; - } -} - -message StraceEnter { -} - -message StraceExit { - // Return value formatted as string. - string return = 1; - - // Formatted error string in case syscall failed. - string error = 2; - - // Value of errno upon syscall exit. - int64 err_no = 3; // errno is a macro and gets expanded :-( - - // Time elapsed between syscall enter and exit. - int64 elapsed_ns = 4; -} diff --git a/pkg/sentry/strace/strace_go_proto/strace.pb.go b/pkg/sentry/strace/strace_go_proto/strace.pb.go new file mode 100755 index 000000000..ef45661bc --- /dev/null +++ b/pkg/sentry/strace/strace_go_proto/strace.pb.go @@ -0,0 +1,247 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: pkg/sentry/strace/strace.proto + +package gvisor + +import ( + fmt "fmt" + proto "github.com/golang/protobuf/proto" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package + +type Strace struct { + Process string `protobuf:"bytes,1,opt,name=process,proto3" json:"process,omitempty"` + Function string `protobuf:"bytes,2,opt,name=function,proto3" json:"function,omitempty"` + Args []string `protobuf:"bytes,3,rep,name=args,proto3" json:"args,omitempty"` + // Types that are valid to be assigned to Info: + // *Strace_Enter + // *Strace_Exit + Info isStrace_Info `protobuf_oneof:"info"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Strace) Reset() { *m = Strace{} } +func (m *Strace) String() string { return proto.CompactTextString(m) } +func (*Strace) ProtoMessage() {} +func (*Strace) Descriptor() ([]byte, []int) { + return fileDescriptor_50c4b43677c82b5f, []int{0} +} + +func (m *Strace) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Strace.Unmarshal(m, b) +} +func (m *Strace) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Strace.Marshal(b, m, deterministic) +} +func (m *Strace) XXX_Merge(src proto.Message) { + xxx_messageInfo_Strace.Merge(m, src) +} +func (m *Strace) XXX_Size() int { + return xxx_messageInfo_Strace.Size(m) +} +func (m *Strace) XXX_DiscardUnknown() { + xxx_messageInfo_Strace.DiscardUnknown(m) +} + +var xxx_messageInfo_Strace proto.InternalMessageInfo + +func (m *Strace) GetProcess() string { + if m != nil { + return m.Process + } + return "" +} + +func (m *Strace) GetFunction() string { + if m != nil { + return m.Function + } + return "" +} + +func (m *Strace) GetArgs() []string { + if m != nil { + return m.Args + } + return nil +} + +type isStrace_Info interface { + isStrace_Info() +} + +type Strace_Enter struct { + Enter *StraceEnter `protobuf:"bytes,4,opt,name=enter,proto3,oneof"` +} + +type Strace_Exit struct { + Exit *StraceExit `protobuf:"bytes,5,opt,name=exit,proto3,oneof"` +} + +func (*Strace_Enter) isStrace_Info() {} + +func (*Strace_Exit) isStrace_Info() {} + +func (m *Strace) GetInfo() isStrace_Info { + if m != nil { + return m.Info + } + return nil +} + +func (m *Strace) GetEnter() *StraceEnter { + if x, ok := m.GetInfo().(*Strace_Enter); ok { + return x.Enter + } + return nil +} + +func (m *Strace) GetExit() *StraceExit { + if x, ok := m.GetInfo().(*Strace_Exit); ok { + return x.Exit + } + return nil +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*Strace) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*Strace_Enter)(nil), + (*Strace_Exit)(nil), + } +} + +type StraceEnter struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StraceEnter) Reset() { *m = StraceEnter{} } +func (m *StraceEnter) String() string { return proto.CompactTextString(m) } +func (*StraceEnter) ProtoMessage() {} +func (*StraceEnter) Descriptor() ([]byte, []int) { + return fileDescriptor_50c4b43677c82b5f, []int{1} +} + +func (m *StraceEnter) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StraceEnter.Unmarshal(m, b) +} +func (m *StraceEnter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StraceEnter.Marshal(b, m, deterministic) +} +func (m *StraceEnter) XXX_Merge(src proto.Message) { + xxx_messageInfo_StraceEnter.Merge(m, src) +} +func (m *StraceEnter) XXX_Size() int { + return xxx_messageInfo_StraceEnter.Size(m) +} +func (m *StraceEnter) XXX_DiscardUnknown() { + xxx_messageInfo_StraceEnter.DiscardUnknown(m) +} + +var xxx_messageInfo_StraceEnter proto.InternalMessageInfo + +type StraceExit struct { + Return string `protobuf:"bytes,1,opt,name=return,proto3" json:"return,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` + ErrNo int64 `protobuf:"varint,3,opt,name=err_no,json=errNo,proto3" json:"err_no,omitempty"` + ElapsedNs int64 `protobuf:"varint,4,opt,name=elapsed_ns,json=elapsedNs,proto3" json:"elapsed_ns,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StraceExit) Reset() { *m = StraceExit{} } +func (m *StraceExit) String() string { return proto.CompactTextString(m) } +func (*StraceExit) ProtoMessage() {} +func (*StraceExit) Descriptor() ([]byte, []int) { + return fileDescriptor_50c4b43677c82b5f, []int{2} +} + +func (m *StraceExit) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StraceExit.Unmarshal(m, b) +} +func (m *StraceExit) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StraceExit.Marshal(b, m, deterministic) +} +func (m *StraceExit) XXX_Merge(src proto.Message) { + xxx_messageInfo_StraceExit.Merge(m, src) +} +func (m *StraceExit) XXX_Size() int { + return xxx_messageInfo_StraceExit.Size(m) +} +func (m *StraceExit) XXX_DiscardUnknown() { + xxx_messageInfo_StraceExit.DiscardUnknown(m) +} + +var xxx_messageInfo_StraceExit proto.InternalMessageInfo + +func (m *StraceExit) GetReturn() string { + if m != nil { + return m.Return + } + return "" +} + +func (m *StraceExit) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +func (m *StraceExit) GetErrNo() int64 { + if m != nil { + return m.ErrNo + } + return 0 +} + +func (m *StraceExit) GetElapsedNs() int64 { + if m != nil { + return m.ElapsedNs + } + return 0 +} + +func init() { + proto.RegisterType((*Strace)(nil), "gvisor.Strace") + proto.RegisterType((*StraceEnter)(nil), "gvisor.StraceEnter") + proto.RegisterType((*StraceExit)(nil), "gvisor.StraceExit") +} + +func init() { proto.RegisterFile("pkg/sentry/strace/strace.proto", fileDescriptor_50c4b43677c82b5f) } + +var fileDescriptor_50c4b43677c82b5f = []byte{ + // 255 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x5c, 0x90, 0xdd, 0x4a, 0xf4, 0x30, + 0x10, 0x86, 0xb7, 0x5f, 0xdb, 0x7c, 0x76, 0x16, 0x4f, 0xc6, 0x1f, 0x82, 0xa0, 0x94, 0x1e, 0x05, + 0x84, 0x2e, 0xe8, 0x1d, 0x08, 0xc2, 0x1e, 0xed, 0x41, 0xbc, 0x80, 0xa5, 0xd6, 0xd9, 0x12, 0x94, + 0x24, 0x4c, 0xb2, 0xb2, 0x5e, 0x96, 0x77, 0x28, 0xa6, 0xf1, 0x07, 0x8f, 0x92, 0x67, 0xde, 0x87, + 0x0c, 0x6f, 0xe0, 0xca, 0x3f, 0x4f, 0xab, 0x40, 0x36, 0xf2, 0xdb, 0x2a, 0x44, 0x1e, 0x46, 0xca, + 0x47, 0xef, 0xd9, 0x45, 0x87, 0x62, 0x7a, 0x35, 0xc1, 0x71, 0xf7, 0x5e, 0x80, 0x78, 0x48, 0x01, + 0x4a, 0xf8, 0xef, 0xd9, 0x8d, 0x14, 0x82, 0x2c, 0xda, 0x42, 0x35, 0xfa, 0x0b, 0xf1, 0x02, 0x8e, + 0x76, 0x7b, 0x3b, 0x46, 0xe3, 0xac, 0xfc, 0x97, 0xa2, 0x6f, 0x46, 0x84, 0x6a, 0xe0, 0x29, 0xc8, + 0xb2, 0x2d, 0x55, 0xa3, 0xd3, 0x1d, 0xaf, 0xa1, 0x26, 0x1b, 0x89, 0x65, 0xd5, 0x16, 0x6a, 0x79, + 0x73, 0xd2, 0xcf, 0xcb, 0xfa, 0x79, 0xd1, 0xfd, 0x67, 0xb4, 0x5e, 0xe8, 0xd9, 0x41, 0x05, 0x15, + 0x1d, 0x4c, 0x94, 0x75, 0x72, 0xf1, 0x8f, 0x7b, 0x30, 0x71, 0xbd, 0xd0, 0xc9, 0xb8, 0x13, 0x50, + 0x19, 0xbb, 0x73, 0xdd, 0x31, 0x2c, 0x7f, 0xbd, 0xd4, 0x79, 0x80, 0x1f, 0x19, 0xcf, 0x41, 0x30, + 0xc5, 0x3d, 0xdb, 0x5c, 0x22, 0x13, 0x9e, 0x42, 0x4d, 0xcc, 0x8e, 0x73, 0x81, 0x19, 0xf0, 0x0c, + 0x04, 0x31, 0x6f, 0xad, 0x93, 0x65, 0x5b, 0xa8, 0x32, 0x8d, 0x37, 0x0e, 0x2f, 0x01, 0xe8, 0x65, + 0xf0, 0x81, 0x9e, 0xb6, 0x36, 0xa4, 0x16, 0xa5, 0x6e, 0xf2, 0x64, 0x13, 0x1e, 0x45, 0xfa, 0xc3, + 0xdb, 0x8f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x42, 0x9a, 0xbc, 0x81, 0x65, 0x01, 0x00, 0x00, +} diff --git a/pkg/sentry/strace/strace_state_autogen.go b/pkg/sentry/strace/strace_state_autogen.go new file mode 100755 index 000000000..9dc697ed6 --- /dev/null +++ b/pkg/sentry/strace/strace_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package strace + diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD deleted file mode 100644 index 79d972202..000000000 --- a/pkg/sentry/syscalls/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "syscalls", - srcs = [ - "epoll.go", - "syscalls.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/arch", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/time", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD deleted file mode 100644 index 33a40b9c6..000000000 --- a/pkg/sentry/syscalls/linux/BUILD +++ /dev/null @@ -1,92 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "linux", - srcs = [ - "error.go", - "flags.go", - "linux64.go", - "sigset.go", - "sys_aio.go", - "sys_capability.go", - "sys_epoll.go", - "sys_eventfd.go", - "sys_file.go", - "sys_futex.go", - "sys_getdents.go", - "sys_identity.go", - "sys_inotify.go", - "sys_lseek.go", - "sys_mempolicy.go", - "sys_mmap.go", - "sys_mount.go", - "sys_pipe.go", - "sys_poll.go", - "sys_prctl.go", - "sys_random.go", - "sys_read.go", - "sys_rlimit.go", - "sys_rusage.go", - "sys_sched.go", - "sys_seccomp.go", - "sys_sem.go", - "sys_shm.go", - "sys_signal.go", - "sys_socket.go", - "sys_splice.go", - "sys_stat.go", - "sys_sync.go", - "sys_sysinfo.go", - "sys_syslog.go", - "sys_thread.go", - "sys_time.go", - "sys_timer.go", - "sys_timerfd.go", - "sys_tls.go", - "sys_utsname.go", - "sys_write.go", - "timespec.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/bpf", - "//pkg/log", - "//pkg/metric", - "//pkg/rand", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/lock", - "//pkg/sentry/fs/timerfd", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/eventfd", - "//pkg/sentry/kernel/fasync", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/sched", - "//pkg/sentry/kernel/shm", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/safemem", - "//pkg/sentry/socket", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/syscalls", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/syscalls/linux/linux_state_autogen.go b/pkg/sentry/syscalls/linux/linux_state_autogen.go new file mode 100755 index 000000000..06d1fb23f --- /dev/null +++ b/pkg/sentry/syscalls/linux/linux_state_autogen.go @@ -0,0 +1,80 @@ +// automatically generated by stateify. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *ioEvent) beforeSave() {} +func (x *ioEvent) save(m state.Map) { + x.beforeSave() + m.Save("Data", &x.Data) + m.Save("Obj", &x.Obj) + m.Save("Result", &x.Result) + m.Save("Result2", &x.Result2) +} + +func (x *ioEvent) afterLoad() {} +func (x *ioEvent) load(m state.Map) { + m.Load("Data", &x.Data) + m.Load("Obj", &x.Obj) + m.Load("Result", &x.Result) + m.Load("Result2", &x.Result2) +} + +func (x *futexWaitRestartBlock) beforeSave() {} +func (x *futexWaitRestartBlock) save(m state.Map) { + x.beforeSave() + m.Save("duration", &x.duration) + m.Save("addr", &x.addr) + m.Save("private", &x.private) + m.Save("val", &x.val) + m.Save("mask", &x.mask) +} + +func (x *futexWaitRestartBlock) afterLoad() {} +func (x *futexWaitRestartBlock) load(m state.Map) { + m.Load("duration", &x.duration) + m.Load("addr", &x.addr) + m.Load("private", &x.private) + m.Load("val", &x.val) + m.Load("mask", &x.mask) +} + +func (x *pollRestartBlock) beforeSave() {} +func (x *pollRestartBlock) save(m state.Map) { + x.beforeSave() + m.Save("pfdAddr", &x.pfdAddr) + m.Save("nfds", &x.nfds) + m.Save("timeout", &x.timeout) +} + +func (x *pollRestartBlock) afterLoad() {} +func (x *pollRestartBlock) load(m state.Map) { + m.Load("pfdAddr", &x.pfdAddr) + m.Load("nfds", &x.nfds) + m.Load("timeout", &x.timeout) +} + +func (x *clockNanosleepRestartBlock) beforeSave() {} +func (x *clockNanosleepRestartBlock) save(m state.Map) { + x.beforeSave() + m.Save("c", &x.c) + m.Save("duration", &x.duration) + m.Save("rem", &x.rem) +} + +func (x *clockNanosleepRestartBlock) afterLoad() {} +func (x *clockNanosleepRestartBlock) load(m state.Map) { + m.Load("c", &x.c) + m.Load("duration", &x.duration) + m.Load("rem", &x.rem) +} + +func init() { + state.Register("linux.ioEvent", (*ioEvent)(nil), state.Fns{Save: (*ioEvent).save, Load: (*ioEvent).load}) + state.Register("linux.futexWaitRestartBlock", (*futexWaitRestartBlock)(nil), state.Fns{Save: (*futexWaitRestartBlock).save, Load: (*futexWaitRestartBlock).load}) + state.Register("linux.pollRestartBlock", (*pollRestartBlock)(nil), state.Fns{Save: (*pollRestartBlock).save, Load: (*pollRestartBlock).load}) + state.Register("linux.clockNanosleepRestartBlock", (*clockNanosleepRestartBlock)(nil), state.Fns{Save: (*clockNanosleepRestartBlock).save, Load: (*clockNanosleepRestartBlock).load}) +} diff --git a/pkg/sentry/syscalls/syscalls_state_autogen.go b/pkg/sentry/syscalls/syscalls_state_autogen.go new file mode 100755 index 000000000..c114e7989 --- /dev/null +++ b/pkg/sentry/syscalls/syscalls_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package syscalls + diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD deleted file mode 100644 index 8aa6a3017..000000000 --- a/pkg/sentry/time/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "seqatomic_parameters", - out = "seqatomic_parameters_unsafe.go", - package = "time", - suffix = "Parameters", - template = "//third_party/gvsync:generic_seqatomic", - types = { - "Value": "Parameters", - }, -) - -go_library( - name = "time", - srcs = [ - "arith_arm64.go", - "calibrated_clock.go", - "clock_id.go", - "clocks.go", - "muldiv_amd64.s", - "muldiv_arm64.s", - "parameters.go", - "sampler.go", - "sampler_unsafe.go", - "seqatomic_parameters_unsafe.go", - "tsc_amd64.s", - "tsc_arm64.s", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/time", - visibility = ["//:sandbox"], - deps = [ - "//pkg/log", - "//pkg/metric", - "//pkg/syserror", - "//third_party/gvsync", - ], -) - -go_test( - name = "time_test", - srcs = [ - "calibrated_clock_test.go", - "parameters_test.go", - "sampler_test.go", - ], - embed = [":time"], -) diff --git a/pkg/sentry/time/LICENSE b/pkg/sentry/time/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/pkg/sentry/time/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/sentry/time/calibrated_clock_test.go b/pkg/sentry/time/calibrated_clock_test.go deleted file mode 100644 index d6622bfe2..000000000 --- a/pkg/sentry/time/calibrated_clock_test.go +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package time - -import ( - "testing" - "time" -) - -// newTestCalibratedClock returns a CalibratedClock that collects samples from -// the given sample list and cycle counts from the given cycle list. -func newTestCalibratedClock(samples []sample, cycles []TSCValue) *CalibratedClock { - return &CalibratedClock{ - ref: newTestSampler(samples, cycles), - } -} - -func TestConstantFrequency(t *testing.T) { - // Perfectly constant frequency. - samples := []sample{ - {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, - {before: 200000, after: 200000 + defaultOverheadCycles, ref: 200}, - {before: 300000, after: 300000 + defaultOverheadCycles, ref: 300}, - {before: 400000, after: 400000 + defaultOverheadCycles, ref: 400}, - {before: 500000, after: 500000 + defaultOverheadCycles, ref: 500}, - {before: 600000, after: 600000 + defaultOverheadCycles, ref: 600}, - {before: 700000, after: 700000 + defaultOverheadCycles, ref: 700}, - } - - c := newTestCalibratedClock(samples, nil) - - // Update from all samples. - for range samples { - c.Update() - } - - c.mu.RLock() - if !c.ready { - c.mu.RUnlock() - t.Fatalf("clock not ready") - } - // A bit after the last sample. - now, ok := c.params.ComputeTime(750000) - c.mu.RUnlock() - if !ok { - t.Fatalf("ComputeTime ok got %v want true", ok) - } - - t.Logf("now: %v", now) - - // Time should be between the current sample and where we'd expect the - // next sample. - if now < 700 || now > 800 { - t.Errorf("now got %v want > 700 && < 800", now) - } -} - -func TestErrorCorrection(t *testing.T) { - testCases := []struct { - name string - samples [5]sample - projectedTimeStart int64 - projectedTimeEnd int64 - }{ - // Initial calibration should be ~1MHz for each of these, and - // the reference clock changes in samples[2]. - { - name: "slow-down", - samples: [5]sample{ - {before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())}, - {before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())}, - // Reference clock has slowed down, causing 100ms of error. - {before: 3010000, after: 3010001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())}, - {before: 4020000, after: 4020001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())}, - {before: 5030000, after: 5030001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())}, - }, - projectedTimeStart: 3005 * time.Millisecond.Nanoseconds(), - projectedTimeEnd: 3015 * time.Millisecond.Nanoseconds(), - }, - { - name: "speed-up", - samples: [5]sample{ - {before: 1000000, after: 1000001, ref: ReferenceNS(1 * ApproxUpdateInterval.Nanoseconds())}, - {before: 2000000, after: 2000001, ref: ReferenceNS(2 * ApproxUpdateInterval.Nanoseconds())}, - // Reference clock has sped up, causing 100ms of error. - {before: 2990000, after: 2990001, ref: ReferenceNS(3 * ApproxUpdateInterval.Nanoseconds())}, - {before: 3980000, after: 3980001, ref: ReferenceNS(4 * ApproxUpdateInterval.Nanoseconds())}, - {before: 4970000, after: 4970001, ref: ReferenceNS(5 * ApproxUpdateInterval.Nanoseconds())}, - }, - projectedTimeStart: 2985 * time.Millisecond.Nanoseconds(), - projectedTimeEnd: 2995 * time.Millisecond.Nanoseconds(), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - c := newTestCalibratedClock(tc.samples[:], nil) - - // Initial calibration takes two updates. - _, ok := c.Update() - if ok { - t.Fatalf("Update ready too early") - } - - params, ok := c.Update() - if !ok { - t.Fatalf("Update not ready") - } - - // Initial calibration is ~1MHz. - hz := params.Frequency - if hz < 990000 || hz > 1010000 { - t.Fatalf("Frequency got %v want > 990kHz && < 1010kHz", hz) - } - - // Project time at the next update. Given the 1MHz - // calibration, it is expected to be ~3.1s/2.9s, not - // the actual 3s. - // - // N.B. the next update time is the "after" time above. - projected, ok := params.ComputeTime(tc.samples[2].after) - if !ok { - t.Fatalf("ComputeTime ok got %v want true", ok) - } - if projected < tc.projectedTimeStart || projected > tc.projectedTimeEnd { - t.Fatalf("ComputeTime(%v) got %v want > %v && < %v", tc.samples[2].after, projected, tc.projectedTimeStart, tc.projectedTimeEnd) - } - - // Update again to see the changed reference clock. - params, ok = c.Update() - if !ok { - t.Fatalf("Update not ready") - } - - // We now know that TSC = tc.samples[2].after -> 3s, - // but with the previous params indicated that TSC - // tc.samples[2].after -> 3.5s/2.5s. We can't allow the - // clock to go backwards, and having the clock jump - // forwards is undesirable. There should be a smooth - // transition that corrects the clock error over time. - // Check that the clock is continuous at TSC = - // tc.samples[2].after. - newProjected, ok := params.ComputeTime(tc.samples[2].after) - if !ok { - t.Fatalf("ComputeTime ok got %v want true", ok) - } - if newProjected != projected { - t.Errorf("Discontinuous time; ComputeTime(%v) got %v want %v", tc.samples[2].after, newProjected, projected) - } - - // As the reference clock stablizes, ensure that the clock error - // decreases. - initialErr := c.errorNS - t.Logf("initial error: %v ns", initialErr) - - _, ok = c.Update() - if !ok { - t.Fatalf("Update not ready") - } - if c.errorNS.Magnitude() > initialErr.Magnitude() { - t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr) - } - - _, ok = c.Update() - if !ok { - t.Fatalf("Update not ready") - } - if c.errorNS.Magnitude() > initialErr.Magnitude() { - t.Errorf("errorNS increased, got %v want |%v| <= |%v|", c.errorNS, c.errorNS, initialErr) - } - - t.Logf("final error: %v ns", c.errorNS) - }) - } -} diff --git a/pkg/sentry/time/parameters_test.go b/pkg/sentry/time/parameters_test.go deleted file mode 100644 index e1b9084ac..000000000 --- a/pkg/sentry/time/parameters_test.go +++ /dev/null @@ -1,486 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package time - -import ( - "math" - "testing" - "time" -) - -func TestParametersComputeTime(t *testing.T) { - testCases := []struct { - name string - params Parameters - now TSCValue - want int64 - }{ - { - // Now is the same as the base cycles. - name: "base-cycles", - params: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 10000, - want: 5000 * time.Millisecond.Nanoseconds(), - }, - { - // Now is the behind the base cycles. Time is frozen. - name: "backwards", - params: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 9000, - want: 5000 * time.Millisecond.Nanoseconds(), - }, - { - // Now is ahead of the base cycles. - name: "ahead", - params: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 15000, - want: 5500 * time.Millisecond.Nanoseconds(), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got, ok := tc.params.ComputeTime(tc.now) - if !ok { - t.Errorf("ComputeTime ok got %v want true", got) - } - if got != tc.want { - t.Errorf("ComputeTime got %+v want %+v", got, tc.want) - } - }) - } -} - -func TestParametersErrorAdjust(t *testing.T) { - testCases := []struct { - name string - oldParams Parameters - now TSCValue - newParams Parameters - want Parameters - errorNS ReferenceNS - wantErr bool - }{ - { - // newParams are perfectly continuous with oldParams - // and don't need adjustment. - name: "continuous", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 50000, - newParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - }, - { - // Same as "continuous", but with now ahead of - // newParams.BaseCycles. The result is the same as - // there is no error to correct. - name: "continuous-nowdiff", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 60000, - newParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - }, - { - // errorAdjust bails out if the TSC goes backwards. - name: "tsc-backwards", - oldParams: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(1000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 9000, - newParams: Parameters{ - BaseCycles: 9000, - BaseRef: ReferenceNS(1100 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - wantErr: true, - }, - { - // errorAdjust bails out if new params are from after now. - name: "params-after-now", - oldParams: Parameters{ - BaseCycles: 10000, - BaseRef: ReferenceNS(1000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 11000, - newParams: Parameters{ - BaseCycles: 12000, - BaseRef: ReferenceNS(1200 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - wantErr: true, - }, - { - // Host clock sped up. - name: "speed-up", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 45000, - // Host frequency changed to 9000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - // From oldParams, we think ref = 4.5s at cycles = 45000. - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 9000, - }, - want: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(4500 * time.Millisecond.Nanoseconds()), - // We must decrease the new frequency by 50% to - // correct 0.5s of error in 1s - // (ApproxUpdateInterval). - Frequency: 4500, - }, - errorNS: ReferenceNS(-500 * time.Millisecond.Nanoseconds()), - }, - { - // Host clock sped up, with now ahead of newParams. - name: "speed-up-nowdiff", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 50000, - // Host frequency changed to 9000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 9000, - }, - // nextRef = 6000ms - // nextCycles = 9000 * (6000ms - 5000ms) + 45000 - // nextCycles = 9000 * (1s) + 45000 - // nextCycles = 54000 - // f = (54000 - 50000) / 1s = 4000 - // - // ref = 5000ms - (50000 - 45000) / 4000 - // ref = 3.75s - want: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(3750 * time.Millisecond.Nanoseconds()), - Frequency: 4000, - }, - // oldNow = 50000 * 10000 = 5s - // newNow = (50000 - 45000) / 9000 + 5s = 5.555s - errorNS: ReferenceNS((5000*time.Millisecond - 5555555555).Nanoseconds()), - }, - { - // Host clock sped up. The new parameters are so far - // ahead that the next update time already passed. - name: "speed-up-uncorrectable-baseref", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 50000, - // Host frequency changed to 5000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(9000 * time.Millisecond.Nanoseconds()), - Frequency: 5000, - }, - // The next update should be at 10s, but newParams - // already passed 6s. Thus it is impossible to correct - // the clock by then. - wantErr: true, - }, - { - // Host clock sped up. The new parameters are moving so - // fast that the next update should be before now. - name: "speed-up-uncorrectable-frequency", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 55000, - // Host frequency changed to 7500 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 45000, - BaseRef: ReferenceNS(6000 * time.Millisecond.Nanoseconds()), - Frequency: 7500, - }, - // The next update should be at 6.5s, but newParams are - // so far ahead and fast that they reach 6.5s at cycle - // 48750, which before now! Thus it is impossible to - // correct the clock by then. - wantErr: true, - }, - { - // Host clock slowed down. - name: "slow-down", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 55000, - // Host frequency changed to 11000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 55000, - // From oldParams, we think ref = 5.5s at cycles = 55000. - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 11000, - }, - want: Parameters{ - BaseCycles: 55000, - BaseRef: ReferenceNS(5500 * time.Millisecond.Nanoseconds()), - // We must increase the new frequency by 50% to - // correct 0.5s of error in 1s - // (ApproxUpdateInterval). - Frequency: 16500, - }, - errorNS: ReferenceNS(500 * time.Millisecond.Nanoseconds()), - }, - { - // Host clock slowed down, with now ahead of newParams. - name: "slow-down-nowdiff", - oldParams: Parameters{ - BaseCycles: 0, - BaseRef: 0, - Frequency: 10000, - }, - now: 60000, - // Host frequency changed to 11000 immediately after - // oldParams was returned. - newParams: Parameters{ - BaseCycles: 55000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 11000, - }, - // nextRef = 7000ms - // nextCycles = 11000 * (7000ms - 5000ms) + 55000 - // nextCycles = 11000 * (2000ms) + 55000 - // nextCycles = 77000 - // f = (77000 - 60000) / 1s = 17000 - // - // ref = 6000ms - (60000 - 55000) / 17000 - // ref = 5705882353ns - want: Parameters{ - BaseCycles: 55000, - BaseRef: ReferenceNS(5705882353), - Frequency: 17000, - }, - // oldNow = 60000 * 10000 = 6s - // newNow = (60000 - 55000) / 11000 + 5s = 5.4545s - errorNS: ReferenceNS((6*time.Second - 5454545454).Nanoseconds()), - }, - { - // Host time went backwards. - name: "time-backwards", - oldParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 60000, - newParams: Parameters{ - BaseCycles: 60000, - // From oldParams, we think ref = 6s at cycles = 60000. - BaseRef: ReferenceNS(4000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 60000, - BaseRef: ReferenceNS(6000 * time.Millisecond.Nanoseconds()), - // We must increase the frequency by 200% to - // correct 2s of error in 1s - // (ApproxUpdateInterval). - Frequency: 30000, - }, - errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()), - }, - { - // Host time went backwards, with now ahead of newParams. - name: "time-backwards-nowdiff", - oldParams: Parameters{ - BaseCycles: 50000, - BaseRef: ReferenceNS(5000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - now: 65000, - // nextRef = 7500ms - // nextCycles = 10000 * (7500ms - 4000ms) + 60000 - // nextCycles = 10000 * (3500ms) + 60000 - // nextCycles = 95000 - // f = (95000 - 65000) / 1s = 30000 - // - // ref = 6500ms - (65000 - 60000) / 30000 - // ref = 6333333333ns - newParams: Parameters{ - BaseCycles: 60000, - BaseRef: ReferenceNS(4000 * time.Millisecond.Nanoseconds()), - Frequency: 10000, - }, - want: Parameters{ - BaseCycles: 60000, - BaseRef: ReferenceNS(6333333334), - Frequency: 30000, - }, - // oldNow = 65000 * 10000 = 6.5s - // newNow = (65000 - 60000) / 10000 + 4s = 4.5s - errorNS: ReferenceNS(2000 * time.Millisecond.Nanoseconds()), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - got, errorNS, err := errorAdjust(tc.oldParams, tc.newParams, tc.now) - if err != nil && !tc.wantErr { - t.Errorf("err got %v want nil", err) - } else if err == nil && tc.wantErr { - t.Errorf("err got nil want non-nil") - } - - if got != tc.want { - t.Errorf("Parameters got %+v want %+v", got, tc.want) - } - if errorNS != tc.errorNS { - t.Errorf("errorNS got %v want %v", errorNS, tc.errorNS) - } - }) - } -} - -func testMuldiv(t *testing.T, v uint64) { - for i := uint64(1); i <= 1000000; i++ { - mult := uint64(1000000000) - div := i * mult - res, ok := muldiv64(v, mult, div) - if !ok { - t.Errorf("Result of %v * %v / %v ok got false want true", v, mult, div) - } - if want := v / i; res != want { - t.Errorf("Bad result of %v * %v / %v: got %v, want %v", v, mult, div, res, want) - } - } -} - -func TestMulDiv(t *testing.T) { - testMuldiv(t, math.MaxUint64) - for i := int64(-10); i <= 10; i++ { - testMuldiv(t, uint64(i)) - } -} - -func TestMulDivZero(t *testing.T) { - if r, ok := muldiv64(2, 4, 0); ok { - t.Errorf("muldiv64(2, 4, 0) got %d, ok want !ok", r) - } - - if r, ok := muldiv64(0, 0, 0); ok { - t.Errorf("muldiv64(0, 0, 0) got %d, ok want !ok", r) - } -} - -func TestMulDivOverflow(t *testing.T) { - testCases := []struct { - name string - val uint64 - mult uint64 - div uint64 - ok bool - ret uint64 - }{ - { - name: "2^62", - val: 1 << 63, - mult: 4, - div: 8, - ok: true, - ret: 1 << 62, - }, - { - name: "2^64-1", - val: 0xffffffffffffffff, - mult: 1, - div: 1, - ok: true, - ret: 0xffffffffffffffff, - }, - { - name: "2^64", - val: 1 << 63, - mult: 4, - div: 2, - ok: false, - }, - { - name: "2^125", - val: 1 << 63, - mult: 1 << 63, - div: 2, - ok: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - r, ok := muldiv64(tc.val, tc.mult, tc.div) - if ok != tc.ok { - t.Errorf("ok got %v want %v", ok, tc.ok) - } - if tc.ok && r != tc.ret { - t.Errorf("ret got %v want %v", r, tc.ret) - } - }) - } -} diff --git a/pkg/sentry/time/sampler_test.go b/pkg/sentry/time/sampler_test.go deleted file mode 100644 index 3e70a1134..000000000 --- a/pkg/sentry/time/sampler_test.go +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package time - -import ( - "errors" - "testing" -) - -// errNoSamples is returned when testReferenceClocks runs out of samples. -var errNoSamples = errors.New("no samples available") - -// testReferenceClocks returns a preset list of samples and cycle counts. -type testReferenceClocks struct { - samples []sample - cycles []TSCValue -} - -// Sample implements referenceClocks.Sample, returning the next sample in the list. -func (t *testReferenceClocks) Sample(_ ClockID) (sample, error) { - if len(t.samples) == 0 { - return sample{}, errNoSamples - } - - s := t.samples[0] - if len(t.samples) == 1 { - t.samples = nil - } else { - t.samples = t.samples[1:] - } - - return s, nil -} - -// Cycles implements referenceClocks.Cycles, returning the next TSCValue in the list. -func (t *testReferenceClocks) Cycles() TSCValue { - if len(t.cycles) == 0 { - return 0 - } - - c := t.cycles[0] - if len(t.cycles) == 1 { - t.cycles = nil - } else { - t.cycles = t.cycles[1:] - } - - return c -} - -// newTestSampler returns a sampler that collects samples from -// the given sample list and cycle counts from the given cycle list. -func newTestSampler(samples []sample, cycles []TSCValue) *sampler { - return &sampler{ - clocks: &testReferenceClocks{ - samples: samples, - cycles: cycles, - }, - overhead: defaultOverheadCycles, - } -} - -// generateSamples generates n samples with the given overhead. -func generateSamples(n int, overhead TSCValue) []sample { - samples := []sample{{before: 1000000, after: 1000000 + overhead, ref: 100}} - for i := 0; i < n-1; i++ { - prev := samples[len(samples)-1] - samples = append(samples, sample{ - before: prev.before + 1000000, - after: prev.after + 1000000, - ref: prev.ref + 100, - }) - } - return samples -} - -// TestSample ensures that samples can be collected. -func TestSample(t *testing.T) { - testCases := []struct { - name string - samples []sample - err error - }{ - { - name: "basic", - samples: []sample{ - {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, - }, - err: nil, - }, - { - // Sample with backwards TSC ignored. - // referenceClock should retry and get errNoSamples. - name: "backwards-tsc-ignored", - samples: []sample{ - {before: 100000, after: 90000, ref: 100}, - }, - err: errNoSamples, - }, - { - // Sample far above overhead skipped. - // referenceClock should retry and get errNoSamples. - name: "reject-overhead", - samples: []sample{ - {before: 100000, after: 100000 + 5*defaultOverheadCycles, ref: 100}, - }, - err: errNoSamples, - }, - { - // Maximum overhead allowed is bounded. - name: "over-max-overhead", - // Generate a bunch of samples. The reference clock - // needs a while to ramp up its expected overhead. - samples: generateSamples(100, 2*maxOverheadCycles), - err: errOverheadTooHigh, - }, - { - // Overhead at maximum overhead is allowed. - name: "max-overhead", - // Generate a bunch of samples. The reference clock - // needs a while to ramp up its expected overhead. - samples: generateSamples(100, maxOverheadCycles), - err: nil, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - s := newTestSampler(tc.samples, nil) - err := s.Sample() - if err != tc.err { - t.Errorf("Sample err got %v want %v", err, tc.err) - } - }) - } -} - -// TestOutliersIgnored tests that referenceClock ignores samples with very high -// overhead. -func TestOutliersIgnored(t *testing.T) { - s := newTestSampler([]sample{ - {before: 100000, after: 100000 + defaultOverheadCycles, ref: 100}, - {before: 200000, after: 200000 + defaultOverheadCycles, ref: 200}, - {before: 300000, after: 300000 + defaultOverheadCycles, ref: 300}, - {before: 400000, after: 400000 + defaultOverheadCycles, ref: 400}, - {before: 500000, after: 500000 + 5*defaultOverheadCycles, ref: 500}, // Ignored - {before: 600000, after: 600000 + defaultOverheadCycles, ref: 600}, - {before: 700000, after: 700000 + defaultOverheadCycles, ref: 700}, - }, nil) - - // Collect 5 samples. - for i := 0; i < 5; i++ { - err := s.Sample() - if err != nil { - t.Fatalf("Unexpected error while sampling: %v", err) - } - } - - oldest, newest, ok := s.Range() - if !ok { - t.Fatalf("Range not ok") - } - - if oldest.ref != 100 { - t.Errorf("oldest.ref got %v want %v", oldest.ref, 100) - } - - // We skipped the high-overhead sample. - if newest.ref != 600 { - t.Errorf("newest.ref got %v want %v", newest.ref, 600) - } -} diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go new file mode 100755 index 000000000..89792c56d --- /dev/null +++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go @@ -0,0 +1,55 @@ +package time + +import ( + "fmt" + "reflect" + "strings" + "unsafe" + + "gvisor.dev/gvisor/third_party/gvsync" +) + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in sc. +func SeqAtomicLoadParameters(sc *gvsync.SeqCount, ptr *Parameters) Parameters { + // This function doesn't use SeqAtomicTryLoad because doing so is + // measurably, significantly (~20%) slower; Go is awful at inlining. + var val Parameters + for { + epoch := sc.BeginRead() + if gvsync.RaceEnabled { + + gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + + val = *ptr + } + if sc.ReadOk(epoch) { + break + } + } + return val +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read +// would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +func SeqAtomicTryLoadParameters(sc *gvsync.SeqCount, epoch gvsync.SeqCountEpoch, ptr *Parameters) (Parameters, bool) { + var val Parameters + if gvsync.RaceEnabled { + gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + val = *ptr + } + return val, sc.ReadOk(epoch) +} + +func initParameters() { + var val Parameters + typ := reflect.TypeOf(val) + name := typ.Name() + if ptrs := gvsync.PointersInType(typ, name); len(ptrs) != 0 { + panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) + } +} diff --git a/pkg/sentry/time/time_state_autogen.go b/pkg/sentry/time/time_state_autogen.go new file mode 100755 index 000000000..ea614b056 --- /dev/null +++ b/pkg/sentry/time/time_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package time + diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD deleted file mode 100644 index b69603da3..000000000 --- a/pkg/sentry/unimpl/BUILD +++ /dev/null @@ -1,30 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -package(licenses = ["notice"]) - -proto_library( - name = "unimplemented_syscall_proto", - srcs = ["unimplemented_syscall.proto"], - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_proto"], -) - -go_proto_library( - name = "unimplemented_syscall_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto", - proto = ":unimplemented_syscall_proto", - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_go_proto"], -) - -go_library( - name = "unimpl", - srcs = ["events.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl", - visibility = ["//:sandbox"], - deps = [ - "//pkg/log", - "//pkg/sentry/context", - ], -) diff --git a/pkg/sentry/unimpl/unimpl_state_autogen.go b/pkg/sentry/unimpl/unimpl_state_autogen.go new file mode 100755 index 000000000..b9d1116f3 --- /dev/null +++ b/pkg/sentry/unimpl/unimpl_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package unimpl + diff --git a/pkg/sentry/unimpl/unimplemented_syscall.proto b/pkg/sentry/unimpl/unimplemented_syscall.proto deleted file mode 100644 index 0d7a94be7..000000000 --- a/pkg/sentry/unimpl/unimplemented_syscall.proto +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package gvisor; - -import "pkg/sentry/arch/registers.proto"; - -message UnimplementedSyscall { - // Task ID. - int32 tid = 1; - - // Registers at the time of the call. - Registers registers = 2; -} diff --git a/pkg/sentry/unimpl/unimplemented_syscall_go_proto/unimplemented_syscall.pb.go b/pkg/sentry/unimpl/unimplemented_syscall_go_proto/unimplemented_syscall.pb.go new file mode 100755 index 000000000..4dfb169cc --- /dev/null +++ b/pkg/sentry/unimpl/unimplemented_syscall_go_proto/unimplemented_syscall.pb.go @@ -0,0 +1,91 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: pkg/sentry/unimpl/unimplemented_syscall.proto + +package gvisor + +import ( + fmt "fmt" + proto "github.com/golang/protobuf/proto" + registers_go_proto "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package + +type UnimplementedSyscall struct { + Tid int32 `protobuf:"varint,1,opt,name=tid,proto3" json:"tid,omitempty"` + Registers *registers_go_proto.Registers `protobuf:"bytes,2,opt,name=registers,proto3" json:"registers,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *UnimplementedSyscall) Reset() { *m = UnimplementedSyscall{} } +func (m *UnimplementedSyscall) String() string { return proto.CompactTextString(m) } +func (*UnimplementedSyscall) ProtoMessage() {} +func (*UnimplementedSyscall) Descriptor() ([]byte, []int) { + return fileDescriptor_ddc2fcd2bea3c75d, []int{0} +} + +func (m *UnimplementedSyscall) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_UnimplementedSyscall.Unmarshal(m, b) +} +func (m *UnimplementedSyscall) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_UnimplementedSyscall.Marshal(b, m, deterministic) +} +func (m *UnimplementedSyscall) XXX_Merge(src proto.Message) { + xxx_messageInfo_UnimplementedSyscall.Merge(m, src) +} +func (m *UnimplementedSyscall) XXX_Size() int { + return xxx_messageInfo_UnimplementedSyscall.Size(m) +} +func (m *UnimplementedSyscall) XXX_DiscardUnknown() { + xxx_messageInfo_UnimplementedSyscall.DiscardUnknown(m) +} + +var xxx_messageInfo_UnimplementedSyscall proto.InternalMessageInfo + +func (m *UnimplementedSyscall) GetTid() int32 { + if m != nil { + return m.Tid + } + return 0 +} + +func (m *UnimplementedSyscall) GetRegisters() *registers_go_proto.Registers { + if m != nil { + return m.Registers + } + return nil +} + +func init() { + proto.RegisterType((*UnimplementedSyscall)(nil), "gvisor.UnimplementedSyscall") +} + +func init() { + proto.RegisterFile("pkg/sentry/unimpl/unimplemented_syscall.proto", fileDescriptor_ddc2fcd2bea3c75d) +} + +var fileDescriptor_ddc2fcd2bea3c75d = []byte{ + // 149 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xd2, 0x2d, 0xc8, 0x4e, 0xd7, + 0x2f, 0x4e, 0xcd, 0x2b, 0x29, 0xaa, 0xd4, 0x2f, 0xcd, 0xcb, 0xcc, 0x2d, 0xc8, 0x81, 0x52, 0xa9, + 0xb9, 0xa9, 0x79, 0x25, 0xa9, 0x29, 0xf1, 0xc5, 0x95, 0xc5, 0xc9, 0x89, 0x39, 0x39, 0x7a, 0x05, + 0x45, 0xf9, 0x25, 0xf9, 0x42, 0x6c, 0xe9, 0x65, 0x99, 0xc5, 0xf9, 0x45, 0x52, 0xf2, 0x48, 0xda, + 0x12, 0x8b, 0x92, 0x33, 0xf4, 0x8b, 0x52, 0xd3, 0x33, 0x8b, 0x4b, 0x52, 0x8b, 0x8a, 0x21, 0x0a, + 0x95, 0x22, 0xb9, 0x44, 0x42, 0x91, 0xcd, 0x09, 0x86, 0x18, 0x23, 0x24, 0xc0, 0xc5, 0x5c, 0x92, + 0x99, 0x22, 0xc1, 0xa8, 0xc0, 0xa8, 0xc1, 0x1a, 0x04, 0x62, 0x0a, 0xe9, 0x73, 0x71, 0xc2, 0x35, + 0x4b, 0x30, 0x29, 0x30, 0x6a, 0x70, 0x1b, 0x09, 0xea, 0x41, 0xac, 0xd1, 0x0b, 0x82, 0x49, 0x04, + 0x21, 0xd4, 0x24, 0xb1, 0x81, 0x6d, 0x30, 0x06, 0x04, 0x00, 0x00, 0xff, 0xff, 0x51, 0x4a, 0x47, + 0x79, 0xbb, 0x00, 0x00, 0x00, +} diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD deleted file mode 100644 index 86a87edd4..000000000 --- a/pkg/sentry/uniqueid/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "uniqueid", - srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/uniqueid", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/socket/unix/transport", - ], -) diff --git a/pkg/sentry/uniqueid/uniqueid_state_autogen.go b/pkg/sentry/uniqueid/uniqueid_state_autogen.go new file mode 100755 index 000000000..09e4327e4 --- /dev/null +++ b/pkg/sentry/uniqueid/uniqueid_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package uniqueid + diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD deleted file mode 100644 index a34c39540..000000000 --- a/pkg/sentry/usage/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "usage", - srcs = [ - "cpu.go", - "io.go", - "memory.go", - "memory_unsafe.go", - "usage.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/usage", - visibility = [ - "//pkg/sentry:internal", - ], - deps = [ - "//pkg/bits", - "//pkg/memutil", - ], -) diff --git a/pkg/sentry/usage/usage_state_autogen.go b/pkg/sentry/usage/usage_state_autogen.go new file mode 100755 index 000000000..d98e3e916 --- /dev/null +++ b/pkg/sentry/usage/usage_state_autogen.go @@ -0,0 +1,50 @@ +// automatically generated by stateify. + +package usage + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *CPUStats) beforeSave() {} +func (x *CPUStats) save(m state.Map) { + x.beforeSave() + m.Save("UserTime", &x.UserTime) + m.Save("SysTime", &x.SysTime) + m.Save("VoluntarySwitches", &x.VoluntarySwitches) +} + +func (x *CPUStats) afterLoad() {} +func (x *CPUStats) load(m state.Map) { + m.Load("UserTime", &x.UserTime) + m.Load("SysTime", &x.SysTime) + m.Load("VoluntarySwitches", &x.VoluntarySwitches) +} + +func (x *IO) beforeSave() {} +func (x *IO) save(m state.Map) { + x.beforeSave() + m.Save("CharsRead", &x.CharsRead) + m.Save("CharsWritten", &x.CharsWritten) + m.Save("ReadSyscalls", &x.ReadSyscalls) + m.Save("WriteSyscalls", &x.WriteSyscalls) + m.Save("BytesRead", &x.BytesRead) + m.Save("BytesWritten", &x.BytesWritten) + m.Save("BytesWriteCancelled", &x.BytesWriteCancelled) +} + +func (x *IO) afterLoad() {} +func (x *IO) load(m state.Map) { + m.Load("CharsRead", &x.CharsRead) + m.Load("CharsWritten", &x.CharsWritten) + m.Load("ReadSyscalls", &x.ReadSyscalls) + m.Load("WriteSyscalls", &x.WriteSyscalls) + m.Load("BytesRead", &x.BytesRead) + m.Load("BytesWritten", &x.BytesWritten) + m.Load("BytesWriteCancelled", &x.BytesWriteCancelled) +} + +func init() { + state.Register("usage.CPUStats", (*CPUStats)(nil), state.Fns{Save: (*CPUStats).save, Load: (*CPUStats).load}) + state.Register("usage.IO", (*IO)(nil), state.Fns{Save: (*IO).save, Load: (*IO).load}) +} diff --git a/pkg/sentry/usermem/BUILD b/pkg/sentry/usermem/BUILD deleted file mode 100644 index a5b4206bb..000000000 --- a/pkg/sentry/usermem/BUILD +++ /dev/null @@ -1,57 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -go_template_instance( - name = "addr_range", - out = "addr_range.go", - package = "usermem", - prefix = "Addr", - template = "//pkg/segment:generic_range", - types = { - "T": "Addr", - }, -) - -go_library( - name = "usermem", - srcs = [ - "access_type.go", - "addr.go", - "addr_range.go", - "addr_range_seq_unsafe.go", - "bytes_io.go", - "bytes_io_unsafe.go", - "usermem.go", - "usermem_arm64.go", - "usermem_unsafe.go", - "usermem_x86.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/usermem", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/atomicbitops", - "//pkg/binary", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/sentry/safemem", - "//pkg/syserror", - "//pkg/tcpip/buffer", - ], -) - -go_test( - name = "usermem_test", - size = "small", - srcs = [ - "addr_range_seq_test.go", - "usermem_test.go", - ], - embed = [":usermem"], - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/safemem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/usermem/README.md b/pkg/sentry/usermem/README.md deleted file mode 100644 index f6d2137eb..000000000 --- a/pkg/sentry/usermem/README.md +++ /dev/null @@ -1,31 +0,0 @@ -This package defines primitives for sentry access to application memory. - -Major types: - -- The `IO` interface represents a virtual address space and provides I/O - methods on that address space. `IO` is the lowest-level primitive. The - primary implementation of the `IO` interface is `mm.MemoryManager`. - -- `IOSequence` represents a collection of individually-contiguous address - ranges in a `IO` that is operated on sequentially, analogous to Linux's - `struct iov_iter`. - -Major usage patterns: - -- Access to a task's virtual memory, subject to the application's memory - protections and while running on that task's goroutine, from a context that - is at or above the level of the `kernel` package (e.g. most syscall - implementations in `syscalls/linux`); use the `kernel.Task.Copy*` wrappers - defined in `kernel/task_usermem.go`. - -- Access to a task's virtual memory, from a context that is at or above the - level of the `kernel` package, but where any of the above constraints does - not hold (e.g. `PTRACE_POKEDATA`, which ignores application memory - protections); obtain the task's `mm.MemoryManager` by calling - `kernel.Task.MemoryManager`, and call its `IO` methods directly. - -- Access to a task's virtual memory, from a context that is below the level of - the `kernel` package (e.g. filesystem I/O); clients must pass I/O arguments - from higher layers, usually in the form of an `IOSequence`. The - `kernel.Task.SingleIOSequence` and `kernel.Task.IovecsIOSequence` functions - in `kernel/task_usermem.go` are convenience functions for doing so. diff --git a/pkg/sentry/usermem/addr_range.go b/pkg/sentry/usermem/addr_range.go new file mode 100755 index 000000000..152ed1434 --- /dev/null +++ b/pkg/sentry/usermem/addr_range.go @@ -0,0 +1,62 @@ +package usermem + +// A Range represents a contiguous range of T. +// +// +stateify savable +type AddrRange struct { + // Start is the inclusive start of the range. + Start Addr + + // End is the exclusive end of the range. + End Addr +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r AddrRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r AddrRange) Length() Addr { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r AddrRange) Contains(x Addr) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r AddrRange) Overlaps(r2 AddrRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r AddrRange) IsSupersetOf(r2 AddrRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r AddrRange) Intersect(r2 AddrRange) AddrRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r AddrRange) CanSplitAt(x Addr) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/usermem/addr_range_seq_test.go b/pkg/sentry/usermem/addr_range_seq_test.go deleted file mode 100644 index 82f735026..000000000 --- a/pkg/sentry/usermem/addr_range_seq_test.go +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "testing" -) - -var addrRangeSeqTests = []struct { - desc string - ranges []AddrRange -}{ - { - desc: "Empty sequence", - }, - { - desc: "Single empty AddrRange", - ranges: []AddrRange{ - {0x10, 0x10}, - }, - }, - { - desc: "Single non-empty AddrRange of length 1", - ranges: []AddrRange{ - {0x10, 0x11}, - }, - }, - { - desc: "Single non-empty AddrRange of length 2", - ranges: []AddrRange{ - {0x10, 0x12}, - }, - }, - { - desc: "Multiple non-empty AddrRanges", - ranges: []AddrRange{ - {0x10, 0x11}, - {0x20, 0x22}, - }, - }, - { - desc: "Multiple AddrRanges including empty AddrRanges", - ranges: []AddrRange{ - {0x10, 0x10}, - {0x20, 0x20}, - {0x30, 0x33}, - {0x40, 0x44}, - {0x50, 0x50}, - {0x60, 0x60}, - {0x70, 0x77}, - {0x80, 0x88}, - {0x90, 0x90}, - {0xa0, 0xa0}, - }, - }, -} - -func testAddrRangeSeqEqualityWithTailIteration(t *testing.T, ars AddrRangeSeq, wantRanges []AddrRange) { - var wantLen int64 - for _, ar := range wantRanges { - wantLen += int64(ar.Length()) - } - - var i int - for !ars.IsEmpty() { - if gotLen := ars.NumBytes(); gotLen != wantLen { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d", i, ars, gotLen, wantLen) - } - if gotN, wantN := ars.NumRanges(), len(wantRanges)-i; gotN != wantN { - t.Errorf("Iteration %d: %v.NumRanges(): got %d, wanted %d", i, ars, gotN, wantN) - } - got := ars.Head() - if i >= len(wantRanges) { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted <end of sequence>", i, ars, got) - } else if want := wantRanges[i]; got != want { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted %s", i, ars, got, want) - } - ars = ars.Tail() - wantLen -= int64(got.Length()) - i++ - } - if gotLen := ars.NumBytes(); gotLen != 0 || wantLen != 0 { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d (which should be 0)", i, ars, gotLen, wantLen) - } - if gotN := ars.NumRanges(); gotN != 0 { - t.Errorf("Iteration %d: %v.NumRanges(): got %d, wanted 0", i, ars, gotN) - } -} - -func TestAddrRangeSeqTailIteration(t *testing.T) { - for _, test := range addrRangeSeqTests { - t.Run(test.desc, func(t *testing.T) { - testAddrRangeSeqEqualityWithTailIteration(t, AddrRangeSeqFromSlice(test.ranges), test.ranges) - }) - } -} - -func TestAddrRangeSeqDropFirstEmpty(t *testing.T) { - var ars AddrRangeSeq - if got, want := ars.DropFirst(1), ars; got != want { - t.Errorf("%v.DropFirst(1): got %v, wanted %v", ars, got, want) - } -} - -func TestAddrRangeSeqDropSingleByteIteration(t *testing.T) { - // Tests AddrRangeSeq iteration using Head/DropFirst, simulating - // I/O-per-AddrRange. - for _, test := range addrRangeSeqTests { - t.Run(test.desc, func(t *testing.T) { - // Figure out what AddrRanges we expect to see. - var wantLen int64 - var wantRanges []AddrRange - for _, ar := range test.ranges { - wantLen += int64(ar.Length()) - wantRanges = append(wantRanges, ar) - if ar.Length() == 0 { - // We "do" 0 bytes of I/O and then call DropFirst(0), - // advancing to the next AddrRange. - continue - } - // Otherwise we "do" 1 byte of I/O and then call DropFirst(1), - // advancing the AddrRange by 1 byte, or to the next AddrRange - // if this one is exhausted. - for ar.Start++; ar.Length() != 0; ar.Start++ { - wantRanges = append(wantRanges, ar) - } - } - t.Logf("Expected AddrRanges: %s (%d bytes)", wantRanges, wantLen) - - ars := AddrRangeSeqFromSlice(test.ranges) - var i int - for !ars.IsEmpty() { - if gotLen := ars.NumBytes(); gotLen != wantLen { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d", i, ars, gotLen, wantLen) - } - got := ars.Head() - if i >= len(wantRanges) { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted <end of sequence>", i, ars, got) - } else if want := wantRanges[i]; got != want { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted %s", i, ars, got, want) - } - if got.Length() == 0 { - ars = ars.DropFirst(0) - } else { - ars = ars.DropFirst(1) - wantLen-- - } - i++ - } - if gotLen := ars.NumBytes(); gotLen != 0 || wantLen != 0 { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d (which should be 0)", i, ars, gotLen, wantLen) - } - }) - } -} - -func TestAddrRangeSeqTakeFirstEmpty(t *testing.T) { - var ars AddrRangeSeq - if got, want := ars.TakeFirst(1), ars; got != want { - t.Errorf("%v.TakeFirst(1): got %v, wanted %v", ars, got, want) - } -} - -func TestAddrRangeSeqTakeFirst(t *testing.T) { - ranges := []AddrRange{ - {0x10, 0x11}, - {0x20, 0x22}, - {0x30, 0x30}, - {0x40, 0x44}, - {0x50, 0x55}, - {0x60, 0x60}, - {0x70, 0x77}, - } - ars := AddrRangeSeqFromSlice(ranges).TakeFirst(5) - want := []AddrRange{ - {0x10, 0x11}, // +1 byte (total 1 byte), not truncated - {0x20, 0x22}, // +2 bytes (total 3 bytes), not truncated - {0x30, 0x30}, // +0 bytes (total 3 bytes), no change - {0x40, 0x42}, // +2 bytes (total 5 bytes), partially truncated - {0x50, 0x50}, // +0 bytes (total 5 bytes), fully truncated - {0x60, 0x60}, // +0 bytes (total 5 bytes), "fully truncated" (no change) - {0x70, 0x70}, // +0 bytes (total 5 bytes), fully truncated - } - testAddrRangeSeqEqualityWithTailIteration(t, ars, want) -} diff --git a/pkg/sentry/usermem/usermem_state_autogen.go b/pkg/sentry/usermem/usermem_state_autogen.go new file mode 100755 index 000000000..39b92d108 --- /dev/null +++ b/pkg/sentry/usermem/usermem_state_autogen.go @@ -0,0 +1,49 @@ +// automatically generated by stateify. + +package usermem + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (x *AccessType) beforeSave() {} +func (x *AccessType) save(m state.Map) { + x.beforeSave() + m.Save("Read", &x.Read) + m.Save("Write", &x.Write) + m.Save("Execute", &x.Execute) +} + +func (x *AccessType) afterLoad() {} +func (x *AccessType) load(m state.Map) { + m.Load("Read", &x.Read) + m.Load("Write", &x.Write) + m.Load("Execute", &x.Execute) +} + +func (x *Addr) save(m state.Map) { + m.SaveValue("", (uintptr)(*x)) +} + +func (x *Addr) load(m state.Map) { + m.LoadValue("", new(uintptr), func(y interface{}) { *x = (Addr)(y.(uintptr)) }) +} + +func (x *AddrRange) beforeSave() {} +func (x *AddrRange) save(m state.Map) { + x.beforeSave() + m.Save("Start", &x.Start) + m.Save("End", &x.End) +} + +func (x *AddrRange) afterLoad() {} +func (x *AddrRange) load(m state.Map) { + m.Load("Start", &x.Start) + m.Load("End", &x.End) +} + +func init() { + state.Register("usermem.AccessType", (*AccessType)(nil), state.Fns{Save: (*AccessType).save, Load: (*AccessType).load}) + state.Register("usermem.Addr", (*Addr)(nil), state.Fns{Save: (*Addr).save, Load: (*Addr).load}) + state.Register("usermem.AddrRange", (*AddrRange)(nil), state.Fns{Save: (*AddrRange).save, Load: (*AddrRange).load}) +} diff --git a/pkg/sentry/usermem/usermem_test.go b/pkg/sentry/usermem/usermem_test.go deleted file mode 100644 index 299f64754..000000000 --- a/pkg/sentry/usermem/usermem_test.go +++ /dev/null @@ -1,424 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "bytes" - "encoding/binary" - "fmt" - "reflect" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/syserror" -) - -// newContext returns a context.Context that we can use in these tests (we -// can't use contexttest because it depends on usermem). -func newContext() context.Context { - return context.Background() -} - -func newBytesIOString(s string) *BytesIO { - return &BytesIO{[]byte(s)} -} - -func TestBytesIOCopyOutSuccess(t *testing.T) { - b := newBytesIOString("ABCDE") - n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) - if wantN := 3; n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := b.Bytes, []byte("AfooE"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyOutFailure(t *testing.T) { - b := newBytesIOString("ABC") - n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) - if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := b.Bytes, []byte("Afo"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInSuccess(t *testing.T) { - b := newBytesIOString("AfooE") - var dst [3]byte - n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) - if wantN := 3; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst[:], []byte("foo"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInFailure(t *testing.T) { - b := newBytesIOString("Afo") - var dst [3]byte - n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) - if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := dst[:], []byte("fo\x00"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -func TestBytesIOZeroOutSuccess(t *testing.T) { - b := newBytesIOString("ABCD") - n, err := b.ZeroOut(newContext(), 1, 2, IOOpts{}) - if wantN := int64(2); n != wantN || err != nil { - t.Errorf("ZeroOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := b.Bytes, []byte("A\x00\x00D"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOZeroOutFailure(t *testing.T) { - b := newBytesIOString("ABC") - n, err := b.ZeroOut(newContext(), 1, 3, IOOpts{}) - if wantN, wantErr := int64(2), syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("ZeroOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := b.Bytes, []byte("A\x00\x00"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyOutFromSuccess(t *testing.T) { - b := newBytesIOString("ABCDEFGH") - n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 4, End: 7}, - {Start: 1, End: 4}, - }), safemem.FromIOReader{bytes.NewBufferString("barfoo")}, IOOpts{}) - if wantN := int64(6); n != wantN || err != nil { - t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := b.Bytes, []byte("AfoobarH"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyOutFromFailure(t *testing.T) { - b := newBytesIOString("ABCDE") - n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 1, End: 4}, - {Start: 4, End: 7}, - }), safemem.FromIOReader{bytes.NewBufferString("foobar")}, IOOpts{}) - if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := b.Bytes, []byte("Afoob"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInToSuccess(t *testing.T) { - b := newBytesIOString("AfoobarH") - var dst bytes.Buffer - n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 4, End: 7}, - {Start: 1, End: 4}, - }), safemem.FromIOWriter{&dst}, IOOpts{}) - if wantN := int64(6); n != wantN || err != nil { - t.Errorf("CopyInTo: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("barfoo"); !bytes.Equal(got, want) { - t.Errorf("dst.Bytes(): got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInToFailure(t *testing.T) { - b := newBytesIOString("Afoob") - var dst bytes.Buffer - n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 1, End: 4}, - {Start: 4, End: 7}, - }), safemem.FromIOWriter{&dst}, IOOpts{}) - if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { - t.Errorf("dst.Bytes(): got %q, wanted %q", got, want) - } -} - -type testStruct struct { - Int8 int8 - Uint8 uint8 - Int16 int16 - Uint16 uint16 - Int32 int32 - Uint32 uint32 - Int64 int64 - Uint64 uint64 -} - -func TestCopyObject(t *testing.T) { - wantObj := testStruct{1, 2, 3, 4, 5, 6, 7, 8} - wantN := binary.Size(wantObj) - b := &BytesIO{make([]byte, wantN)} - ctx := newContext() - if n, err := CopyObjectOut(ctx, b, 0, &wantObj, IOOpts{}); n != wantN || err != nil { - t.Fatalf("CopyObjectOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - var gotObj testStruct - if n, err := CopyObjectIn(ctx, b, 0, &gotObj, IOOpts{}); n != wantN || err != nil { - t.Errorf("CopyObjectIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if gotObj != wantObj { - t.Errorf("CopyObject round trip: got %+v, wanted %+v", gotObj, wantObj) - } -} - -func TestCopyStringInShort(t *testing.T) { - // Tests for string length <= copyStringIncrement. - want := strings.Repeat("A", copyStringIncrement-2) - mem := want + "\x00" - if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringIncrement, IOOpts{}); got != want || err != nil { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) - } -} - -func TestCopyStringInLong(t *testing.T) { - // Tests for copyStringIncrement < string length <= copyStringMaxInitBufLen - // (requiring multiple calls to IO.CopyIn()). - want := strings.Repeat("A", copyStringIncrement*3/4) + strings.Repeat("B", copyStringIncrement*3/4) - mem := want + "\x00" - if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringIncrement, IOOpts{}); got != want || err != nil { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) - } -} - -func TestCopyStringInVeryLong(t *testing.T) { - // Tests for string length > copyStringMaxInitBufLen (requiring buffer - // reallocation). - want := strings.Repeat("A", copyStringMaxInitBufLen*3/4) + strings.Repeat("B", copyStringMaxInitBufLen*3/4) - mem := want + "\x00" - if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringMaxInitBufLen, IOOpts{}); got != want || err != nil { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) - } -} - -func TestCopyStringInNoTerminatingZeroByte(t *testing.T) { - want := strings.Repeat("A", copyStringIncrement-1) - got, err := CopyStringIn(newContext(), newBytesIOString(want), 0, 2*copyStringIncrement, IOOpts{}) - if wantErr := syserror.EFAULT; got != want || err != wantErr { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) - } -} - -func TestCopyStringInTruncatedByMaxlen(t *testing.T) { - got, err := CopyStringIn(newContext(), newBytesIOString(strings.Repeat("A", 10)), 0, 5, IOOpts{}) - if want, wantErr := strings.Repeat("A", 5), syserror.ENAMETOOLONG; got != want || err != wantErr { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) - } -} - -func TestCopyInt32StringsInVec(t *testing.T) { - for _, test := range []struct { - str string - n int - initial []int32 - final []int32 - }{ - { - str: "100 200", - n: len("100 200"), - initial: []int32{1, 2}, - final: []int32{100, 200}, - }, - { - // Fewer values ok - str: "100", - n: len("100"), - initial: []int32{1, 2}, - final: []int32{100, 2}, - }, - { - // Extra values ok - str: "100 200 300", - n: len("100 200 "), - initial: []int32{1, 2}, - final: []int32{100, 200}, - }, - { - // Leading and trailing whitespace ok - str: " 100\t200\n", - n: len(" 100\t200\n"), - initial: []int32{1, 2}, - final: []int32{100, 200}, - }, - } { - t.Run(fmt.Sprintf("%q", test.str), func(t *testing.T) { - src := BytesIOSequence([]byte(test.str)) - dsts := append([]int32(nil), test.initial...) - if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); n != int64(test.n) || err != nil { - t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (%d, nil)", n, err, test.n) - } - if !reflect.DeepEqual(dsts, test.final) { - t.Errorf("dsts: got %v, wanted %v", dsts, test.final) - } - }) - } -} - -func TestCopyInt32StringsInVecRequiresOneValidValue(t *testing.T) { - for _, s := range []string{"", "\n", "a123"} { - t.Run(fmt.Sprintf("%q", s), func(t *testing.T) { - src := BytesIOSequence([]byte(s)) - initial := []int32{1, 2} - dsts := append([]int32(nil), initial...) - if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); err != syserror.EINVAL { - t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (_, %v)", n, err, syserror.EINVAL) - } - if !reflect.DeepEqual(dsts, initial) { - t.Errorf("dsts: got %v, wanted %v", dsts, initial) - } - }) - } -} - -func TestIOSequenceCopyOut(t *testing.T) { - buf := []byte("ABCD") - s := BytesIOSequence(buf) - - // CopyOut limited by len(src). - n, err := s.CopyOut(newContext(), []byte("fo")) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foCD"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(2); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // CopyOut limited by s.NumBytes(). - n, err = s.CopyOut(newContext(), []byte("obar")) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foob"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} - -func TestIOSequenceCopyIn(t *testing.T) { - s := BytesIOSequence([]byte("foob")) - dst := []byte("ABCDEF") - - // CopyIn limited by len(dst). - n, err := s.CopyIn(newContext(), dst[:2]) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foCDEF"); !bytes.Equal(dst, want) { - t.Errorf("dst: got %q, wanted %q", dst, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(2); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // CopyIn limited by s.Remaining(). - n, err = s.CopyIn(newContext(), dst[2:]) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foobEF"); !bytes.Equal(dst, want) { - t.Errorf("dst: got %q, wanted %q", dst, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} - -func TestIOSequenceZeroOut(t *testing.T) { - buf := []byte("ABCD") - s := BytesIOSequence(buf) - - // ZeroOut limited by toZero. - n, err := s.ZeroOut(newContext(), 2) - if wantN := int64(2); n != wantN || err != nil { - t.Errorf("ZeroOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("\x00\x00CD"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(2); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // ZeroOut limited by s.NumBytes(). - n, err = s.ZeroOut(newContext(), 4) - if wantN := int64(2); n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("\x00\x00\x00\x00"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} - -func TestIOSequenceTakeFirst(t *testing.T) { - s := BytesIOSequence([]byte("foobar")) - if got, want := s.NumBytes(), int64(6); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - s = s.TakeFirst(3) - if got, want := s.NumBytes(), int64(3); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // TakeFirst(n) where n > s.NumBytes() is a no-op. - s = s.TakeFirst(9) - if got, want := s.NumBytes(), int64(3); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - var dst [3]byte - n, err := s.CopyIn(newContext(), dst[:]) - if wantN := 3; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst[:], []byte("foo"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } - s = s.DropFirst(3) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD deleted file mode 100644 index 4de6c41cf..000000000 --- a/pkg/sentry/vfs/BUILD +++ /dev/null @@ -1,46 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "vfs", - srcs = [ - "context.go", - "debug.go", - "dentry.go", - "file_description.go", - "file_description_impl_util.go", - "filesystem.go", - "filesystem_type.go", - "mount.go", - "mount_unsafe.go", - "options.go", - "permissions.go", - "resolving_path.go", - "syscalls.go", - "vfs.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/vfs", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/fspath", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - "//third_party/gvsync", - ], -) - -go_test( - name = "vfs_test", - size = "small", - srcs = [ - "mount_test.go", - ], - embed = [":vfs"], -) diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md deleted file mode 100644 index 7847854bc..000000000 --- a/pkg/sentry/vfs/README.md +++ /dev/null @@ -1,197 +0,0 @@ -# The gVisor Virtual Filesystem - -THIS PACKAGE IS CURRENTLY EXPERIMENTAL AND NOT READY OR ENABLED FOR PRODUCTION -USE. For the filesystem implementation currently used by gVisor, see the `fs` -package. - -## Implementation Notes - -### Reference Counting - -Filesystem, Dentry, Mount, MountNamespace, and FileDescription are all -reference-counted. Mount and MountNamespace are exclusively VFS-managed; when -their reference count reaches zero, VFS releases their resources. Filesystem and -FileDescription management is shared between VFS and filesystem implementations; -when their reference count reaches zero, VFS notifies the implementation by -calling `FilesystemImpl.Release()` or `FileDescriptionImpl.Release()` -respectively and then releases VFS-owned resources. Dentries are exclusively -managed by filesystem implementations; reference count changes are abstracted -through DentryImpl, which should release resources when reference count reaches -zero. - -Filesystem references are held by: - -- Mount: Each referenced Mount holds a reference on the mounted Filesystem. - -Dentry references are held by: - -- FileDescription: Each referenced FileDescription holds a reference on the - Dentry through which it was opened, via `FileDescription.vd.dentry`. - -- Mount: Each referenced Mount holds a reference on its mount point and on the - mounted filesystem root. The mount point is mutable (`mount(MS_MOVE)`). - -Mount references are held by: - -- FileDescription: Each referenced FileDescription holds a reference on the - Mount on which it was opened, via `FileDescription.vd.mount`. - -- Mount: Each referenced Mount holds a reference on its parent, which is the - mount containing its mount point. - -- VirtualFilesystem: A reference is held on all Mounts that are attached - (reachable by Mount traversal). - -MountNamespace and FileDescription references are held by users of VFS. The -expectation is that each `kernel.Task` holds a reference on its corresponding -MountNamespace, and each file descriptor holds a reference on its represented -FileDescription. - -Notes: - -- Dentries do not hold a reference on their owning Filesystem. Instead, all - uses of a Dentry occur in the context of a Mount, which holds a reference on - the relevant Filesystem (see e.g. the VirtualDentry type). As a corollary, - when releasing references on both a Dentry and its corresponding Mount, the - Dentry's reference must be released first (because releasing the Mount's - reference may release the last reference on the Filesystem, whose state may - be required to release the Dentry reference). - -### The Inheritance Pattern - -Filesystem, Dentry, and FileDescription are all concepts featuring both state -that must be shared between VFS and filesystem implementations, and operations -that are implementation-defined. To facilitate this, each of these three -concepts follows the same pattern, shown below for Dentry: - -```go -// Dentry represents a node in a filesystem tree. -type Dentry struct { - // VFS-required dentry state. - parent *Dentry - // ... - - // impl is the DentryImpl associated with this Dentry. impl is immutable. - // This should be the last field in Dentry. - impl DentryImpl -} - -// Init must be called before first use of d. -func (d *Dentry) Init(impl DentryImpl) { - d.impl = impl -} - -// Impl returns the DentryImpl associated with d. -func (d *Dentry) Impl() DentryImpl { - return d.impl -} - -// DentryImpl contains implementation-specific details of a Dentry. -// Implementations of DentryImpl should contain their associated Dentry by -// value as their first field. -type DentryImpl interface { - // VFS-required implementation-defined dentry operations. - IncRef() - // ... -} -``` - -This construction, which is essentially a type-safe analogue to Linux's -`container_of` pattern, has the following properties: - -- VFS works almost exclusively with pointers to Dentry rather than DentryImpl - interface objects, such as in the type of `Dentry.parent`. This avoids - interface method calls (which are somewhat expensive to perform, and defeat - inlining and escape analysis), reduces the size of VFS types (since an - interface object is two pointers in size), and allows pointers to be loaded - and stored atomically using `sync/atomic`. Implementation-defined behavior - is accessed via `Dentry.impl` when required. - -- Filesystem implementations can access the implementation-defined state - associated with objects of VFS types by type-asserting or type-switching - (e.g. `Dentry.Impl().(*myDentry)`). Type assertions to a concrete type - require only an equality comparison of the interface object's type pointer - to a static constant, and are consequently very fast. - -- Filesystem implementations can access the VFS state associated with objects - of implementation-defined types directly. - -- VFS and implementation-defined state for a given type occupy the same - object, minimizing memory allocations and maximizing memory locality. `impl` - is the last field in `Dentry`, and `Dentry` is the first field in - `DentryImpl` implementations, for similar reasons: this tends to cause - fetching of the `Dentry.impl` interface object to also fetch `DentryImpl` - fields, either because they are in the same cache line or via next-line - prefetching. - -## Future Work - -- Most `mount(2)` features, and unmounting, are incomplete. - -- VFS1 filesystems are not directly compatible with VFS2. It may be possible - to implement shims that implement `vfs.FilesystemImpl` for - `fs.MountNamespace`, `vfs.DentryImpl` for `fs.Dirent`, and - `vfs.FileDescriptionImpl` for `fs.File`, which may be adequate for - filesystems that are not performance-critical (e.g. sysfs); however, it is - not clear that this will be less effort than simply porting the filesystems - in question. Practically speaking, the following filesystems will probably - need to be ported or made compatible through a shim to evaluate filesystem - performance on realistic workloads: - - - devfs/procfs/sysfs, which will realistically be necessary to execute - most applications. (Note that procfs and sysfs do not support hard - links, so they do not require the complexity of separate inode objects. - Also note that Linux's /dev is actually a variant of tmpfs called - devtmpfs.) - - - tmpfs. This should be relatively straightforward: copy/paste memfs, - store regular file contents in pgalloc-allocated memory instead of - `[]byte`, and add support for file timestamps. (In fact, it probably - makes more sense to convert memfs to tmpfs and not keep the former.) - - - A remote filesystem, either lisafs (if it is ready by the time that - other benchmarking prerequisites are) or v9fs (aka 9P, aka gofers). - - - epoll files. - - Filesystems that will need to be ported before switching to VFS2, but can - probably be skipped for early testing: - - - overlayfs, which is needed for (at least) synthetic mount points. - - - Support for host ttys. - - - timerfd files. - - Filesystems that can be probably dropped: - - - ashmem, which is far too incomplete to use. - - - binder, which is similarly far too incomplete to use. - - - whitelistfs, which we are already actively attempting to remove. - -- Save/restore. For instance, it is unclear if the current implementation of - the `state` package supports the inheritance pattern described above. - -- Many features that were previously implemented by VFS must now be - implemented by individual filesystems (though, in most cases, this should - consist of calls to hooks or libraries provided by `vfs` or other packages). - This includes, but is not necessarily limited to: - - - Block and character device special files - - - Inotify - - - File locking - - - `O_ASYNC` - -- Reference counts in the `vfs` package do not use the `refs` package since - `refs.AtomicRefCount` adds 64 bytes of overhead to each 8-byte reference - count, resulting in considerable cache bloat. 24 bytes of this overhead is - for weak reference support, which have poor performance and will not be used - by VFS2. The remaining 40 bytes is to store a descriptive string and stack - trace for reference leak checking; we can support reference leak checking - without incurring this space overhead by including the applicable - information directly in finalizers for applicable types. diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go deleted file mode 100644 index 32cf9151b..000000000 --- a/pkg/sentry/vfs/context.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "gvisor.dev/gvisor/pkg/sentry/context" -) - -// contextID is this package's type for context.Context.Value keys. -type contextID int - -const ( - // CtxMountNamespace is a Context.Value key for a MountNamespace. - CtxMountNamespace contextID = iota -) - -// MountNamespaceFromContext returns the MountNamespace used by ctx. It does -// not take a reference on the returned MountNamespace. If ctx is not -// associated with a MountNamespace, MountNamespaceFromContext returns nil. -func MountNamespaceFromContext(ctx context.Context) *MountNamespace { - if v := ctx.Value(CtxMountNamespace); v != nil { - return v.(*MountNamespace) - } - return nil -} diff --git a/pkg/sentry/vfs/debug.go b/pkg/sentry/vfs/debug.go deleted file mode 100644 index 0ed20f249..000000000 --- a/pkg/sentry/vfs/debug.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -const ( - // If checkInvariants is true, perform runtime checks for invariants - // expected by the vfs package. This is normally disabled since VFS is - // often a hot path. - checkInvariants = false -) diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go deleted file mode 100644 index 45912fc58..000000000 --- a/pkg/sentry/vfs/dentry.go +++ /dev/null @@ -1,347 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/syserror" -) - -// Dentry represents a node in a Filesystem tree which may represent a file. -// -// Dentries are reference-counted. Unless otherwise specified, all Dentry -// methods require that a reference is held. -// -// A Dentry transitions through up to 3 different states through its lifetime: -// -// - Dentries are initially "independent". Independent Dentries have no parent, -// and consequently no name. -// -// - Dentry.InsertChild() causes an independent Dentry to become a "child" of -// another Dentry. A child node has a parent node, and a name in that parent, -// both of which are mutable by DentryMoveChild(). Each child Dentry's name is -// unique within its parent. -// -// - Dentry.RemoveChild() causes a child Dentry to become "disowned". A -// disowned Dentry can still refer to its former parent and its former name in -// said parent, but the disowned Dentry is no longer reachable from its parent, -// and a new Dentry with the same name may become a child of the parent. (This -// is analogous to a struct dentry being "unhashed" in Linux.) -// -// Dentry is loosely analogous to Linux's struct dentry, but: -// -// - VFS does not associate Dentries with inodes. gVisor interacts primarily -// with filesystems that are accessed through filesystem APIs (as opposed to -// raw block devices); many such APIs support only paths and file descriptors, -// and not inodes. Furthermore, when parties outside the scope of VFS can -// rename inodes on such filesystems, VFS generally cannot "follow" the rename, -// both due to synchronization issues and because it may not even be able to -// name the destination path; this implies that it would in fact be *incorrect* -// for Dentries to be associated with inodes on such filesystems. Consequently, -// operations that are inode operations in Linux are FilesystemImpl methods -// and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do -// support inodes may store appropriate state in implementations of DentryImpl. -// -// - VFS does not provide synchronization for mutable Dentry fields, other than -// mount-related ones. -// -// - VFS does not require that Dentries are instantiated for all paths accessed -// through VFS, only those that are tracked beyond the scope of a single -// Filesystem operation. This includes file descriptions, mount points, mount -// roots, process working directories, and chroots. This avoids instantiation -// of Dentries for operations on mutable remote filesystems that can't actually -// cache any state in the Dentry. -// -// - For the reasons above, VFS is not directly responsible for managing Dentry -// lifetime. Dentry reference counts only indicate the extent to which VFS -// requires Dentries to exist; Filesystems may elect to cache or discard -// Dentries with zero references. -type Dentry struct { - // parent is this Dentry's parent in this Filesystem. If this Dentry is - // independent, parent is nil. - parent *Dentry - - // name is this Dentry's name in parent. - name string - - flags uint32 - - // mounts is the number of Mounts for which this Dentry is Mount.point. - // mounts is accessed using atomic memory operations. - mounts uint32 - - // children are child Dentries. - children map[string]*Dentry - - // impl is the DentryImpl associated with this Dentry. impl is immutable. - // This should be the last field in Dentry. - impl DentryImpl -} - -const ( - // dflagsDisownedMask is set in Dentry.flags if the Dentry has been - // disowned. - dflagsDisownedMask = 1 << iota -) - -// Init must be called before first use of d. -func (d *Dentry) Init(impl DentryImpl) { - d.impl = impl -} - -// Impl returns the DentryImpl associated with d. -func (d *Dentry) Impl() DentryImpl { - return d.impl -} - -// DentryImpl contains implementation details for a Dentry. Implementations of -// DentryImpl should contain their associated Dentry by value as their first -// field. -type DentryImpl interface { - // IncRef increments the Dentry's reference count. A Dentry with a non-zero - // reference count must remain coherent with the state of the filesystem. - IncRef(fs *Filesystem) - - // TryIncRef increments the Dentry's reference count and returns true. If - // the Dentry's reference count is zero, TryIncRef may do nothing and - // return false. (It is also permitted to succeed if it can restore the - // guarantee that the Dentry is coherent with the state of the filesystem.) - // - // TryIncRef does not require that a reference is held on the Dentry. - TryIncRef(fs *Filesystem) bool - - // DecRef decrements the Dentry's reference count. - DecRef(fs *Filesystem) -} - -// IsDisowned returns true if d is disowned. -func (d *Dentry) IsDisowned() bool { - return atomic.LoadUint32(&d.flags)&dflagsDisownedMask != 0 -} - -// Preconditions: !d.IsDisowned(). -func (d *Dentry) setDisowned() { - atomic.AddUint32(&d.flags, dflagsDisownedMask) -} - -func (d *Dentry) isMounted() bool { - return atomic.LoadUint32(&d.mounts) != 0 -} - -func (d *Dentry) incRef(fs *Filesystem) { - d.impl.IncRef(fs) -} - -func (d *Dentry) tryIncRef(fs *Filesystem) bool { - return d.impl.TryIncRef(fs) -} - -func (d *Dentry) decRef(fs *Filesystem) { - d.impl.DecRef(fs) -} - -// These functions are exported so that filesystem implementations can use -// them. The vfs package, and users of VFS, should not call these functions. -// Unless otherwise specified, these methods require that there are no -// concurrent mutators of d. - -// Name returns d's name in its parent in its owning Filesystem. If d is -// independent, Name returns an empty string. -func (d *Dentry) Name() string { - return d.name -} - -// Parent returns d's parent in its owning Filesystem. It does not take a -// reference on the returned Dentry. If d is independent, Parent returns nil. -func (d *Dentry) Parent() *Dentry { - return d.parent -} - -// ParentOrSelf is equivalent to Parent, but returns d if d is independent. -func (d *Dentry) ParentOrSelf() *Dentry { - if d.parent == nil { - return d - } - return d.parent -} - -// Child returns d's child with the given name in its owning Filesystem. It -// does not take a reference on the returned Dentry. If no such child exists, -// Child returns nil. -func (d *Dentry) Child(name string) *Dentry { - return d.children[name] -} - -// HasChildren returns true if d has any children. -func (d *Dentry) HasChildren() bool { - return len(d.children) != 0 -} - -// InsertChild makes child a child of d with the given name. -// -// InsertChild is a mutator of d and child. -// -// Preconditions: child must be an independent Dentry. d and child must be from -// the same Filesystem. d must not already have a child with the given name. -func (d *Dentry) InsertChild(child *Dentry, name string) { - if checkInvariants { - if _, ok := d.children[name]; ok { - panic(fmt.Sprintf("parent already contains a child named %q", name)) - } - if child.parent != nil || child.name != "" { - panic(fmt.Sprintf("child is not independent: parent = %v, name = %q", child.parent, child.name)) - } - } - if d.children == nil { - d.children = make(map[string]*Dentry) - } - d.children[name] = child - child.parent = d - child.name = name -} - -// PrepareDeleteDentry must be called before attempting to delete the file -// represented by d. If PrepareDeleteDentry succeeds, the caller must call -// AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome. -// -// Preconditions: d is a child Dentry. -func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error { - if checkInvariants { - if d.parent == nil { - panic("d is independent") - } - if d.IsDisowned() { - panic("d is already disowned") - } - } - vfs.mountMu.RLock() - if _, ok := mntns.mountpoints[d]; ok { - vfs.mountMu.RUnlock() - return syserror.EBUSY - } - // Return with vfs.mountMu locked, which will be unlocked by - // AbortDeleteDentry or CommitDeleteDentry. - return nil -} - -// AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion -// fails. -func (vfs *VirtualFilesystem) AbortDeleteDentry() { - vfs.mountMu.RUnlock() -} - -// CommitDeleteDentry must be called after the file represented by d is -// deleted, and causes d to become disowned. -// -// Preconditions: PrepareDeleteDentry was previously called on d. -func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) { - delete(d.parent.children, d.name) - d.setDisowned() - // TODO: lazily unmount mounts at d - vfs.mountMu.RUnlock() -} - -// DeleteDentry combines PrepareDeleteDentry and CommitDeleteDentry, as -// appropriate for in-memory filesystems that don't need to ensure that some -// external state change succeeds before committing the deletion. -func (vfs *VirtualFilesystem) DeleteDentry(mntns *MountNamespace, d *Dentry) error { - if err := vfs.PrepareDeleteDentry(mntns, d); err != nil { - return err - } - vfs.CommitDeleteDentry(d) - return nil -} - -// PrepareRenameDentry must be called before attempting to rename the file -// represented by from. If to is not nil, it represents the file that will be -// replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the -// caller must call AbortRenameDentry, CommitRenameReplaceDentry, or -// CommitRenameExchangeDentry depending on the rename's outcome. -// -// Preconditions: from is a child Dentry. If to is not nil, it must be a child -// Dentry from the same Filesystem. -func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error { - if checkInvariants { - if from.parent == nil { - panic("from is independent") - } - if from.IsDisowned() { - panic("from is already disowned") - } - if to != nil { - if to.parent == nil { - panic("to is independent") - } - if to.IsDisowned() { - panic("to is already disowned") - } - } - } - vfs.mountMu.RLock() - if _, ok := mntns.mountpoints[from]; ok { - vfs.mountMu.RUnlock() - return syserror.EBUSY - } - if to != nil { - if _, ok := mntns.mountpoints[to]; ok { - vfs.mountMu.RUnlock() - return syserror.EBUSY - } - } - // Return with vfs.mountMu locked, which will be unlocked by - // AbortRenameDentry, CommitRenameReplaceDentry, or - // CommitRenameExchangeDentry. - return nil -} - -// AbortRenameDentry must be called after PrepareRenameDentry if the rename -// fails. -func (vfs *VirtualFilesystem) AbortRenameDentry() { - vfs.mountMu.RUnlock() -} - -// CommitRenameReplaceDentry must be called after the file represented by from -// is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file -// that was replaced by from. -// -// Preconditions: PrepareRenameDentry was previously called on from and to. -// newParent.Child(newName) == to. -func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(from, newParent *Dentry, newName string, to *Dentry) { - if to != nil { - to.setDisowned() - // TODO: lazily unmount mounts at d - } - if newParent.children == nil { - newParent.children = make(map[string]*Dentry) - } - newParent.children[newName] = from - from.parent = newParent - from.name = newName - vfs.mountMu.RUnlock() -} - -// CommitRenameExchangeDentry must be called after the files represented by -// from and to are exchanged by rename(RENAME_EXCHANGE). -// -// Preconditions: PrepareRenameDentry was previously called on from and to. -func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) { - from.parent, to.parent = to.parent, from.parent - from.name, to.name = to.name, from.name - from.parent.children[from.name] = from - to.parent.children[to.name] = to - vfs.mountMu.RUnlock() -} diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go deleted file mode 100644 index 86bde7fb3..000000000 --- a/pkg/sentry/vfs/file_description.go +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -// A FileDescription represents an open file description, which is the entity -// referred to by a file descriptor (POSIX.1-2017 3.258 "Open File -// Description"). -// -// FileDescriptions are reference-counted. Unless otherwise specified, all -// FileDescription methods require that a reference is held. -// -// FileDescription is analogous to Linux's struct file. -type FileDescription struct { - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 - - // vd is the filesystem location at which this FileDescription was opened. - // A reference is held on vd. vd is immutable. - vd VirtualDentry - - // impl is the FileDescriptionImpl associated with this Filesystem. impl is - // immutable. This should be the last field in FileDescription. - impl FileDescriptionImpl -} - -// Init must be called before first use of fd. It takes references on mnt and -// d. -func (fd *FileDescription) Init(impl FileDescriptionImpl, mnt *Mount, d *Dentry) { - fd.refs = 1 - fd.vd = VirtualDentry{ - mount: mnt, - dentry: d, - } - fd.vd.IncRef() - fd.impl = impl -} - -// Impl returns the FileDescriptionImpl associated with fd. -func (fd *FileDescription) Impl() FileDescriptionImpl { - return fd.impl -} - -// VirtualDentry returns the location at which fd was opened. It does not take -// a reference on the returned VirtualDentry. -func (fd *FileDescription) VirtualDentry() VirtualDentry { - return fd.vd -} - -// IncRef increments fd's reference count. -func (fd *FileDescription) IncRef() { - atomic.AddInt64(&fd.refs, 1) -} - -// DecRef decrements fd's reference count. -func (fd *FileDescription) DecRef() { - if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { - fd.impl.Release() - fd.vd.DecRef() - } else if refs < 0 { - panic("FileDescription.DecRef() called without holding a reference") - } -} - -// FileDescriptionImpl contains implementation details for an FileDescription. -// Implementations of FileDescriptionImpl should contain their associated -// FileDescription by value as their first field. -// -// For all functions that return linux.Statx, Statx.Uid and Statx.Gid will -// be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID and -// auth.KGID respectively). -// -// FileDescriptionImpl is analogous to Linux's struct file_operations. -type FileDescriptionImpl interface { - // Release is called when the associated FileDescription reaches zero - // references. - Release() - - // OnClose is called when a file descriptor representing the - // FileDescription is closed. Note that returning a non-nil error does not - // prevent the file descriptor from being closed. - OnClose() error - - // StatusFlags returns file description status flags, as for - // fcntl(F_GETFL). - StatusFlags(ctx context.Context) (uint32, error) - - // SetStatusFlags sets file description status flags, as for - // fcntl(F_SETFL). - SetStatusFlags(ctx context.Context, flags uint32) error - - // Stat returns metadata for the file represented by the FileDescription. - Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) - - // SetStat updates metadata for the file represented by the - // FileDescription. - SetStat(ctx context.Context, opts SetStatOptions) error - - // StatFS returns metadata for the filesystem containing the file - // represented by the FileDescription. - StatFS(ctx context.Context) (linux.Statfs, error) - - // waiter.Waitable methods may be used to poll for I/O events. - waiter.Waitable - - // PRead reads from the file into dst, starting at the given offset, and - // returns the number of bytes read. PRead is permitted to return partial - // reads with a nil error. - PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) - - // Read is similar to PRead, but does not specify an offset. - // - // For files with an implicit FileDescription offset (e.g. regular files), - // Read begins at the FileDescription offset, and advances the offset by - // the number of bytes read; note that POSIX 2.9.7 "Thread Interactions - // with Regular File Operations" requires that all operations that may - // mutate the FileDescription offset are serialized. - Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) - - // PWrite writes src to the file, starting at the given offset, and returns - // the number of bytes written. PWrite is permitted to return partial - // writes with a nil error. - // - // As in Linux (but not POSIX), if O_APPEND is in effect for the - // FileDescription, PWrite should ignore the offset and append data to the - // end of the file. - PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) - - // Write is similar to PWrite, but does not specify an offset, which is - // implied as for Read. - // - // Write is a FileDescriptionImpl method, instead of a wrapper around - // PWrite that uses a FileDescription offset, to make it possible for - // remote filesystems to implement O_APPEND correctly (i.e. atomically with - // respect to writers outside the scope of VFS). - Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) - - // IterDirents invokes cb on each entry in the directory represented by the - // FileDescription. If IterDirents has been called since the last call to - // Seek, it continues iteration from the end of the last call. - IterDirents(ctx context.Context, cb IterDirentsCallback) error - - // Seek changes the FileDescription offset (assuming one exists) and - // returns its new value. - // - // For directories, if whence == SEEK_SET and offset == 0, the caller is - // rewinddir(), such that Seek "shall also cause the directory stream to - // refer to the current state of the corresponding directory" - - // POSIX.1-2017. - Seek(ctx context.Context, offset int64, whence int32) (int64, error) - - // Sync requests that cached state associated with the file represented by - // the FileDescription is synchronized with persistent storage, and blocks - // until this is complete. - Sync(ctx context.Context) error - - // ConfigureMMap mutates opts to implement mmap(2) for the file. Most - // implementations that support memory mapping can call - // GenericConfigureMMap with the appropriate memmap.Mappable. - ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error - - // Ioctl implements the ioctl(2) syscall. - Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) - - // TODO: extended attributes; file locking -} - -// Dirent holds the information contained in struct linux_dirent64. -type Dirent struct { - // Name is the filename. - Name string - - // Type is the file type, a linux.DT_* constant. - Type uint8 - - // Ino is the inode number. - Ino uint64 - - // Off is this Dirent's offset. - Off int64 -} - -// IterDirentsCallback receives Dirents from FileDescriptionImpl.IterDirents. -type IterDirentsCallback interface { - // Handle handles the given iterated Dirent. It returns true if iteration - // should continue, and false if FileDescriptionImpl.IterDirents should - // terminate now and restart with the same Dirent the next time it is - // called. - Handle(dirent Dirent) bool -} diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go deleted file mode 100644 index 486893e70..000000000 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/waiter" -) - -// FileDescriptionDefaultImpl may be embedded by implementations of -// FileDescriptionImpl to obtain implementations of many FileDescriptionImpl -// methods with default behavior analogous to Linux's. -type FileDescriptionDefaultImpl struct{} - -// OnClose implements FileDescriptionImpl.OnClose analogously to -// file_operations::flush == NULL in Linux. -func (FileDescriptionDefaultImpl) OnClose() error { - return nil -} - -// StatFS implements FileDescriptionImpl.StatFS analogously to -// super_operations::statfs == NULL in Linux. -func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, error) { - return linux.Statfs{}, syserror.ENOSYS -} - -// Readiness implements waiter.Waitable.Readiness analogously to -// file_operations::poll == NULL in Linux. -func (FileDescriptionDefaultImpl) Readiness(mask waiter.EventMask) waiter.EventMask { - // include/linux/poll.h:vfs_poll() => DEFAULT_POLLMASK - return waiter.EventIn | waiter.EventOut -} - -// EventRegister implements waiter.Waitable.EventRegister analogously to -// file_operations::poll == NULL in Linux. -func (FileDescriptionDefaultImpl) EventRegister(e *waiter.Entry, mask waiter.EventMask) { -} - -// EventUnregister implements waiter.Waitable.EventUnregister analogously to -// file_operations::poll == NULL in Linux. -func (FileDescriptionDefaultImpl) EventUnregister(e *waiter.Entry) { -} - -// PRead implements FileDescriptionImpl.PRead analogously to -// file_operations::read == file_operations::read_iter == NULL in Linux. -func (FileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { - return 0, syserror.EINVAL -} - -// Read implements FileDescriptionImpl.Read analogously to -// file_operations::read == file_operations::read_iter == NULL in Linux. -func (FileDescriptionDefaultImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { - return 0, syserror.EINVAL -} - -// PWrite implements FileDescriptionImpl.PWrite analogously to -// file_operations::write == file_operations::write_iter == NULL in Linux. -func (FileDescriptionDefaultImpl) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { - return 0, syserror.EINVAL -} - -// Write implements FileDescriptionImpl.Write analogously to -// file_operations::write == file_operations::write_iter == NULL in Linux. -func (FileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - return 0, syserror.EINVAL -} - -// IterDirents implements FileDescriptionImpl.IterDirents analogously to -// file_operations::iterate == file_operations::iterate_shared == NULL in -// Linux. -func (FileDescriptionDefaultImpl) IterDirents(ctx context.Context, cb IterDirentsCallback) error { - return syserror.ENOTDIR -} - -// Seek implements FileDescriptionImpl.Seek analogously to -// file_operations::llseek == NULL in Linux. -func (FileDescriptionDefaultImpl) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, syserror.ESPIPE -} - -// Sync implements FileDescriptionImpl.Sync analogously to -// file_operations::fsync == NULL in Linux. -func (FileDescriptionDefaultImpl) Sync(ctx context.Context) error { - return syserror.EINVAL -} - -// ConfigureMMap implements FileDescriptionImpl.ConfigureMMap analogously to -// file_operations::mmap == NULL in Linux. -func (FileDescriptionDefaultImpl) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error { - return syserror.ENODEV -} - -// Ioctl implements FileDescriptionImpl.Ioctl analogously to -// file_operations::unlocked_ioctl == NULL in Linux. -func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { - return 0, syserror.ENOTTY -} - -// DirectoryFileDescriptionDefaultImpl may be embedded by implementations of -// FileDescriptionImpl that always represent directories to obtain -// implementations of non-directory I/O methods that return EISDIR, and -// implementations of other methods consistent with FileDescriptionDefaultImpl. -type DirectoryFileDescriptionDefaultImpl struct { - FileDescriptionDefaultImpl -} - -// PRead implements FileDescriptionImpl.PRead. -func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { - return 0, syserror.EISDIR -} - -// Read implements FileDescriptionImpl.Read. -func (DirectoryFileDescriptionDefaultImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { - return 0, syserror.EISDIR -} - -// PWrite implements FileDescriptionImpl.PWrite. -func (DirectoryFileDescriptionDefaultImpl) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { - return 0, syserror.EISDIR -} - -// Write implements FileDescriptionImpl.Write. -func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - return 0, syserror.EISDIR -} diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go deleted file mode 100644 index 7a074b718..000000000 --- a/pkg/sentry/vfs/filesystem.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" -) - -// A Filesystem is a tree of nodes represented by Dentries, which forms part of -// a VirtualFilesystem. -// -// Filesystems are reference-counted. Unless otherwise specified, all -// Filesystem methods require that a reference is held. -// -// Filesystem is analogous to Linux's struct super_block. -type Filesystem struct { - // refs is the reference count. refs is accessed using atomic memory - // operations. - refs int64 - - // impl is the FilesystemImpl associated with this Filesystem. impl is - // immutable. This should be the last field in Dentry. - impl FilesystemImpl -} - -// Init must be called before first use of fs. -func (fs *Filesystem) Init(impl FilesystemImpl) { - fs.refs = 1 - fs.impl = impl -} - -// Impl returns the FilesystemImpl associated with fs. -func (fs *Filesystem) Impl() FilesystemImpl { - return fs.impl -} - -func (fs *Filesystem) incRef() { - if atomic.AddInt64(&fs.refs, 1) <= 1 { - panic("Filesystem.incRef() called without holding a reference") - } -} - -func (fs *Filesystem) decRef() { - if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 { - fs.impl.Release() - } else if refs < 0 { - panic("Filesystem.decRef() called without holding a reference") - } -} - -// FilesystemImpl contains implementation details for a Filesystem. -// Implementations of FilesystemImpl should contain their associated Filesystem -// by value as their first field. -// -// All methods that take a ResolvingPath must resolve the path before -// performing any other checks, including rejection of the operation if not -// supported by the FilesystemImpl. This is because the final FilesystemImpl -// (responsible for actually implementing the operation) isn't known until path -// resolution is complete. -// -// For all methods that take or return linux.Statx, Statx.Uid and Statx.Gid -// should be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID -// and auth.KGID respectively). -// -// FilesystemImpl combines elements of Linux's struct super_operations and -// struct inode_operations, for reasons described in the documentation for -// Dentry. -type FilesystemImpl interface { - // Release is called when the associated Filesystem reaches zero - // references. - Release() - - // Sync "causes all pending modifications to filesystem metadata and cached - // file data to be written to the underlying [filesystem]", as by syncfs(2). - Sync(ctx context.Context) error - - // GetDentryAt returns a Dentry representing the file at rp. A reference is - // taken on the returned Dentry. - // - // GetDentryAt does not correspond directly to a Linux syscall; it is used - // in the implementation of: - // - // - Syscalls that need to resolve two paths: rename(), renameat(), - // renameat2(), link(), linkat(). - // - // - Syscalls that need to refer to a filesystem position outside the - // context of a file description: chdir(), fchdir(), chroot(), mount(), - // umount(). - GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) - - // LinkAt creates a hard link at rp representing the same file as vd. It - // does not take ownership of references on vd. - // - // The implementation is responsible for checking that vd.Mount() == - // rp.Mount(), and that vd does not represent a directory. - LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error - - // MkdirAt creates a directory at rp. - MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error - - // MknodAt creates a regular file, device special file, or named pipe at - // rp. - MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error - - // OpenAt returns an FileDescription providing access to the file at rp. A - // reference is taken on the returned FileDescription. - OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) - - // ReadlinkAt returns the target of the symbolic link at rp. - ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) - - // RenameAt renames the Dentry represented by vd to rp. It does not take - // ownership of references on vd. - // - // The implementation is responsible for checking that vd.Mount() == - // rp.Mount(). - RenameAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry, opts RenameOptions) error - - // RmdirAt removes the directory at rp. - RmdirAt(ctx context.Context, rp *ResolvingPath) error - - // SetStatAt updates metadata for the file at the given path. - SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error - - // StatAt returns metadata for the file at rp. - StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) - - // StatFSAt returns metadata for the filesystem containing the file at rp. - // (This method takes a path because a FilesystemImpl may consist of any - // number of constituent filesystems.) - StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) - - // SymlinkAt creates a symbolic link at rp referring to the given target. - SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error - - // UnlinkAt removes the non-directory file at rp. - UnlinkAt(ctx context.Context, rp *ResolvingPath) error - - // TODO: d_path(); extended attributes; inotify_add_watch(); bind() -} diff --git a/pkg/sentry/vfs/filesystem_type.go b/pkg/sentry/vfs/filesystem_type.go deleted file mode 100644 index f401ad7f3..000000000 --- a/pkg/sentry/vfs/filesystem_type.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -// A FilesystemType constructs filesystems. -// -// FilesystemType is analogous to Linux's struct file_system_type. -type FilesystemType interface { - // NewFilesystem returns a Filesystem configured by the given options, - // along with its mount root. A reference is taken on the returned - // Filesystem and Dentry. - NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts NewFilesystemOptions) (*Filesystem, *Dentry, error) -} - -// NewFilesystemOptions contains options to FilesystemType.NewFilesystem. -type NewFilesystemOptions struct { - // Data is the string passed as the 5th argument to mount(2), which is - // usually a comma-separated list of filesystem-specific mount options. - Data string - - // InternalData holds opaque FilesystemType-specific data. There is - // intentionally no way for applications to specify InternalData; if it is - // not nil, the call to NewFilesystem originates from within the sentry. - InternalData interface{} -} - -// RegisterFilesystemType registers the given FilesystemType in vfs with the -// given name. -func (vfs *VirtualFilesystem) RegisterFilesystemType(name string, fsType FilesystemType) error { - vfs.fsTypesMu.Lock() - defer vfs.fsTypesMu.Unlock() - if existing, ok := vfs.fsTypes[name]; ok { - return fmt.Errorf("name %q is already registered to filesystem type %T", name, existing) - } - vfs.fsTypes[name] = fsType - return nil -} - -// MustRegisterFilesystemType is equivalent to RegisterFilesystemType but -// panics on failure. -func (vfs *VirtualFilesystem) MustRegisterFilesystemType(name string, fsType FilesystemType) { - if err := vfs.RegisterFilesystemType(name, fsType); err != nil { - panic(fmt.Sprintf("failed to register filesystem type %T: %v", fsType, err)) - } -} - -func (vfs *VirtualFilesystem) getFilesystemType(name string) FilesystemType { - vfs.fsTypesMu.RLock() - defer vfs.fsTypesMu.RUnlock() - return vfs.fsTypes[name] -} diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go deleted file mode 100644 index 11702f720..000000000 --- a/pkg/sentry/vfs/mount.go +++ /dev/null @@ -1,411 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "math" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -// A Mount is a replacement of a Dentry (Mount.key.point) from one Filesystem -// (Mount.key.parent.fs) with a Dentry (Mount.root) from another Filesystem -// (Mount.fs), which applies to path resolution in the context of a particular -// Mount (Mount.key.parent). -// -// Mounts are reference-counted. Unless otherwise specified, all Mount methods -// require that a reference is held. -// -// Mount and Filesystem are distinct types because it's possible for a single -// Filesystem to be mounted at multiple locations and/or in multiple mount -// namespaces. -// -// Mount is analogous to Linux's struct mount. (gVisor does not distinguish -// between struct mount and struct vfsmount.) -type Mount struct { - // The lower 63 bits of refs are a reference count. The MSB of refs is set - // if the Mount has been eagerly unmounted, as by umount(2) without the - // MNT_DETACH flag. refs is accessed using atomic memory operations. - refs int64 - - // The lower 63 bits of writers is the number of calls to - // Mount.CheckBeginWrite() that have not yet been paired with a call to - // Mount.EndWrite(). The MSB of writers is set if MS_RDONLY is in effect. - // writers is accessed using atomic memory operations. - writers int64 - - // key is protected by VirtualFilesystem.mountMu and - // VirtualFilesystem.mounts.seq, and may be nil. References are held on - // key.parent and key.point if they are not nil. - // - // Invariant: key.parent != nil iff key.point != nil. key.point belongs to - // key.parent.fs. - key mountKey - - // fs, root, and ns are immutable. References are held on fs and root (but - // not ns). - // - // Invariant: root belongs to fs. - fs *Filesystem - root *Dentry - ns *MountNamespace -} - -// A MountNamespace is a collection of Mounts. -// -// MountNamespaces are reference-counted. Unless otherwise specified, all -// MountNamespace methods require that a reference is held. -// -// MountNamespace is analogous to Linux's struct mnt_namespace. -type MountNamespace struct { - refs int64 // accessed using atomic memory operations - - // root is the MountNamespace's root mount. root is immutable. - root *Mount - - // mountpoints contains all Dentries which are mount points in this - // namespace. mountpoints is protected by VirtualFilesystem.mountMu. - // - // mountpoints is used to determine if a Dentry can be moved or removed - // (which requires that the Dentry is not a mount point in the calling - // namespace). - // - // mountpoints is maintained even if there are no references held on the - // MountNamespace; this is required to ensure that - // VFS.PrepareDeleteDentry() and VFS.PrepareRemoveDentry() operate - // correctly on unreferenced MountNamespaces. - mountpoints map[*Dentry]struct{} -} - -// NewMountNamespace returns a new mount namespace with a root filesystem -// configured by the given arguments. A reference is taken on the returned -// MountNamespace. -func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *NewFilesystemOptions) (*MountNamespace, error) { - fsType := vfs.getFilesystemType(fsTypeName) - if fsType == nil { - return nil, syserror.ENODEV - } - fs, root, err := fsType.NewFilesystem(ctx, creds, source, *opts) - if err != nil { - return nil, err - } - mntns := &MountNamespace{ - refs: 1, - mountpoints: make(map[*Dentry]struct{}), - } - mntns.root = &Mount{ - fs: fs, - root: root, - ns: mntns, - refs: 1, - } - return mntns, nil -} - -// NewMount creates and mounts a new Filesystem. -func (vfs *VirtualFilesystem) NewMount(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *NewFilesystemOptions) error { - fsType := vfs.getFilesystemType(fsTypeName) - if fsType == nil { - return syserror.ENODEV - } - fs, root, err := fsType.NewFilesystem(ctx, creds, source, *opts) - if err != nil { - return err - } - // We can't hold vfs.mountMu while calling FilesystemImpl methods due to - // lock ordering. - vd, err := vfs.GetDentryAt(ctx, creds, target, &GetDentryOptions{}) - if err != nil { - root.decRef(fs) - fs.decRef() - return err - } - vfs.mountMu.Lock() - for { - if vd.dentry.IsDisowned() { - vfs.mountMu.Unlock() - vd.DecRef() - root.decRef(fs) - fs.decRef() - return syserror.ENOENT - } - // vd might have been mounted over between vfs.GetDentryAt() and - // vfs.mountMu.Lock(). - if !vd.dentry.isMounted() { - break - } - nextmnt := vfs.mounts.Lookup(vd.mount, vd.dentry) - if nextmnt == nil { - break - } - nextmnt.incRef() - nextmnt.root.incRef(nextmnt.fs) - vd.DecRef() - vd = VirtualDentry{ - mount: nextmnt, - dentry: nextmnt.root, - } - } - // TODO: Linux requires that either both the mount point and the mount root - // are directories, or neither are, and returns ENOTDIR if this is not the - // case. - mntns := vd.mount.ns - mnt := &Mount{ - fs: fs, - root: root, - ns: mntns, - refs: 1, - } - mnt.storeKey(vd.mount, vd.dentry) - atomic.AddUint32(&vd.dentry.mounts, 1) - mntns.mountpoints[vd.dentry] = struct{}{} - vfsmpmounts, ok := vfs.mountpoints[vd.dentry] - if !ok { - vfsmpmounts = make(map[*Mount]struct{}) - vfs.mountpoints[vd.dentry] = vfsmpmounts - } - vfsmpmounts[mnt] = struct{}{} - vfs.mounts.Insert(mnt) - vfs.mountMu.Unlock() - return nil -} - -// getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes -// a reference on the returned Mount. If (mnt, d) is not a mount point, -// getMountAt returns nil. -// -// getMountAt is analogous to Linux's fs/namei.c:follow_mount(). -// -// Preconditions: References are held on mnt and d. -func (vfs *VirtualFilesystem) getMountAt(mnt *Mount, d *Dentry) *Mount { - // The first mount is special-cased: - // - // - The caller is assumed to have checked d.isMounted() already. (This - // isn't a precondition because it doesn't matter for correctness.) - // - // - We return nil, instead of mnt, if there is no mount at (mnt, d). - // - // - We don't drop the caller's references on mnt and d. -retryFirst: - next := vfs.mounts.Lookup(mnt, d) - if next == nil { - return nil - } - if !next.tryIncMountedRef() { - // Raced with umount. - goto retryFirst - } - mnt = next - d = next.root - // We don't need to take Dentry refs anywhere in this function because - // Mounts hold references on Mount.root, which is immutable. - for d.isMounted() { - next := vfs.mounts.Lookup(mnt, d) - if next == nil { - break - } - if !next.tryIncMountedRef() { - // Raced with umount. - continue - } - mnt.decRef() - mnt = next - d = next.root - } - return mnt -} - -// getMountpointAt returns the mount point for the stack of Mounts including -// mnt. It takes a reference on the returned Mount and Dentry. If no such mount -// point exists (i.e. mnt is a root mount), getMountpointAt returns (nil, nil). -// -// Preconditions: References are held on mnt and root. vfsroot is not (mnt, -// mnt.root). -func (vfs *VirtualFilesystem) getMountpointAt(mnt *Mount, vfsroot VirtualDentry) (*Mount, *Dentry) { - // The first mount is special-cased: - // - // - The caller must have already checked mnt against vfsroot. - // - // - We return nil, instead of mnt, if there is no mount point for mnt. - // - // - We don't drop the caller's reference on mnt. -retryFirst: - epoch := vfs.mounts.seq.BeginRead() - parent, point := mnt.loadKey() - if !vfs.mounts.seq.ReadOk(epoch) { - goto retryFirst - } - if parent == nil { - return nil, nil - } - if !parent.tryIncMountedRef() { - // Raced with umount. - goto retryFirst - } - if !point.tryIncRef(parent.fs) { - // Since Mount holds a reference on Mount.key.point, this can only - // happen due to a racing change to Mount.key. - parent.decRef() - goto retryFirst - } - mnt = parent - d := point - for { - if mnt == vfsroot.mount && d == vfsroot.dentry { - break - } - if d != mnt.root { - break - } - retryNotFirst: - epoch := vfs.mounts.seq.BeginRead() - parent, point := mnt.loadKey() - if !vfs.mounts.seq.ReadOk(epoch) { - goto retryNotFirst - } - if parent == nil { - break - } - if !parent.tryIncMountedRef() { - // Raced with umount. - goto retryNotFirst - } - if !point.tryIncRef(parent.fs) { - // Since Mount holds a reference on Mount.key.point, this can - // only happen due to a racing change to Mount.key. - parent.decRef() - goto retryNotFirst - } - if !vfs.mounts.seq.ReadOk(epoch) { - point.decRef(parent.fs) - parent.decRef() - goto retryNotFirst - } - d.decRef(mnt.fs) - mnt.decRef() - mnt = parent - d = point - } - return mnt, d -} - -// tryIncMountedRef increments mnt's reference count and returns true. If mnt's -// reference count is already zero, or has been eagerly unmounted, -// tryIncMountedRef does nothing and returns false. -// -// tryIncMountedRef does not require that a reference is held on mnt. -func (mnt *Mount) tryIncMountedRef() bool { - for { - refs := atomic.LoadInt64(&mnt.refs) - if refs <= 0 { // refs < 0 => MSB set => eagerly unmounted - return false - } - if atomic.CompareAndSwapInt64(&mnt.refs, refs, refs+1) { - return true - } - } -} - -func (mnt *Mount) incRef() { - // In general, negative values for mnt.refs are valid because the MSB is - // the eager-unmount bit. - atomic.AddInt64(&mnt.refs, 1) -} - -func (mnt *Mount) decRef() { - refs := atomic.AddInt64(&mnt.refs, -1) - if refs&^math.MinInt64 == 0 { // mask out MSB - parent, point := mnt.loadKey() - if point != nil { - point.decRef(parent.fs) - parent.decRef() - } - mnt.root.decRef(mnt.fs) - mnt.fs.decRef() - } -} - -// CheckBeginWrite increments the counter of in-progress write operations on -// mnt. If mnt is mounted MS_RDONLY, CheckBeginWrite does nothing and returns -// EROFS. -// -// If CheckBeginWrite succeeds, EndWrite must be called when the write -// operation is finished. -func (mnt *Mount) CheckBeginWrite() error { - if atomic.AddInt64(&mnt.writers, 1) < 0 { - atomic.AddInt64(&mnt.writers, -1) - return syserror.EROFS - } - return nil -} - -// EndWrite indicates that a write operation signaled by a previous successful -// call to CheckBeginWrite has finished. -func (mnt *Mount) EndWrite() { - atomic.AddInt64(&mnt.writers, -1) -} - -// Preconditions: VirtualFilesystem.mountMu must be locked for writing. -func (mnt *Mount) setReadOnlyLocked(ro bool) error { - if oldRO := atomic.LoadInt64(&mnt.writers) < 0; oldRO == ro { - return nil - } - if ro { - if !atomic.CompareAndSwapInt64(&mnt.writers, 0, math.MinInt64) { - return syserror.EBUSY - } - return nil - } - // Unset MSB without dropping any temporary increments from failed calls to - // mnt.CheckBeginWrite(). - atomic.AddInt64(&mnt.writers, math.MinInt64) - return nil -} - -// Filesystem returns the mounted Filesystem. It does not take a reference on -// the returned Filesystem. -func (mnt *Mount) Filesystem() *Filesystem { - return mnt.fs -} - -// IncRef increments mntns' reference count. -func (mntns *MountNamespace) IncRef() { - if atomic.AddInt64(&mntns.refs, 1) <= 1 { - panic("MountNamespace.IncRef() called without holding a reference") - } -} - -// DecRef decrements mntns' reference count. -func (mntns *MountNamespace) DecRef() { - if refs := atomic.AddInt64(&mntns.refs, 0); refs == 0 { - // TODO: unmount mntns.root - } else if refs < 0 { - panic("MountNamespace.DecRef() called without holding a reference") - } -} - -// Root returns mntns' root. A reference is taken on the returned -// VirtualDentry. -func (mntns *MountNamespace) Root() VirtualDentry { - vd := VirtualDentry{ - mount: mntns.root, - dentry: mntns.root.root, - } - vd.IncRef() - return vd -} diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go deleted file mode 100644 index f394d7483..000000000 --- a/pkg/sentry/vfs/mount_test.go +++ /dev/null @@ -1,465 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "fmt" - "runtime" - "sync" - "testing" -) - -func TestMountTableLookupEmpty(t *testing.T) { - var mt mountTable - mt.Init() - - parent := &Mount{} - point := &Dentry{} - if m := mt.Lookup(parent, point); m != nil { - t.Errorf("empty mountTable lookup: got %p, wanted nil", m) - } -} - -func TestMountTableInsertLookup(t *testing.T) { - var mt mountTable - mt.Init() - - mount := &Mount{} - mount.storeKey(&Mount{}, &Dentry{}) - mt.Insert(mount) - - if m := mt.Lookup(mount.parent(), mount.point()); m != mount { - t.Errorf("mountTable positive lookup: got %p, wanted %p", m, mount) - } - - otherParent := &Mount{} - if m := mt.Lookup(otherParent, mount.point()); m != nil { - t.Errorf("mountTable lookup with wrong mount parent: got %p, wanted nil", m) - } - otherPoint := &Dentry{} - if m := mt.Lookup(mount.parent(), otherPoint); m != nil { - t.Errorf("mountTable lookup with wrong mount point: got %p, wanted nil", m) - } -} - -// TODO: concurrent lookup/insertion/removal - -// must be powers of 2 -var benchNumMounts = []int{1 << 2, 1 << 5, 1 << 8} - -// For all of the following: -// -// - BenchmarkMountTableFoo tests usage pattern "Foo" for mountTable. -// -// - BenchmarkMountMapFoo tests usage pattern "Foo" for a -// sync.RWMutex-protected map. (Mutator benchmarks do not use a RWMutex, since -// mountTable also requires external synchronization between mutators.) -// -// - BenchmarkMountSyncMapFoo tests usage pattern "Foo" for a sync.Map. -// -// ParallelLookup is by far the most common and performance-sensitive operation -// for this application. NegativeLookup is also important, but less so (only -// relevant with multiple mount namespaces and significant differences in -// mounts between them). Insertion and removal are benchmarked for -// completeness. -const enableComparativeBenchmarks = false - -func newBenchMount() *Mount { - mount := &Mount{} - mount.storeKey(&Mount{}, &Dentry{}) - return mount -} - -func vdkey(mnt *Mount) VirtualDentry { - parent, point := mnt.loadKey() - return VirtualDentry{ - mount: parent, - dentry: point, - } -} - -func BenchmarkMountTableParallelLookup(b *testing.B) { - for numG, maxG := 1, runtime.GOMAXPROCS(0); numG >= 0 && numG <= maxG; numG *= 2 { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%dx%d", numG, numMounts) - b.Run(desc, func(b *testing.B) { - var mt mountTable - mt.Init() - keys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - mt.Insert(mount) - keys = append(keys, vdkey(mount)) - } - - var ready sync.WaitGroup - begin := make(chan struct{}) - var end sync.WaitGroup - for g := 0; g < numG; g++ { - ready.Add(1) - end.Add(1) - go func() { - defer end.Done() - ready.Done() - <-begin - for i := 0; i < b.N; i++ { - k := keys[i&(numMounts-1)] - m := mt.Lookup(k.mount, k.dentry) - if m == nil { - b.Fatalf("lookup failed") - } - if parent := m.parent(); parent != k.mount { - b.Fatalf("lookup returned mount with parent %p, wanted %p", parent, k.mount) - } - if point := m.point(); point != k.dentry { - b.Fatalf("lookup returned mount with point %p, wanted %p", point, k.dentry) - } - } - }() - } - - ready.Wait() - b.ResetTimer() - close(begin) - end.Wait() - }) - } - } -} - -func BenchmarkMountMapParallelLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for numG, maxG := 1, runtime.GOMAXPROCS(0); numG >= 0 && numG <= maxG; numG *= 2 { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%dx%d", numG, numMounts) - b.Run(desc, func(b *testing.B) { - var mu sync.RWMutex - ms := make(map[VirtualDentry]*Mount) - keys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - key := vdkey(mount) - ms[key] = mount - keys = append(keys, key) - } - - var ready sync.WaitGroup - begin := make(chan struct{}) - var end sync.WaitGroup - for g := 0; g < numG; g++ { - ready.Add(1) - end.Add(1) - go func() { - defer end.Done() - ready.Done() - <-begin - for i := 0; i < b.N; i++ { - k := keys[i&(numMounts-1)] - mu.RLock() - m := ms[k] - mu.RUnlock() - if m == nil { - b.Fatalf("lookup failed") - } - if parent := m.parent(); parent != k.mount { - b.Fatalf("lookup returned mount with parent %p, wanted %p", parent, k.mount) - } - if point := m.point(); point != k.dentry { - b.Fatalf("lookup returned mount with point %p, wanted %p", point, k.dentry) - } - } - }() - } - - ready.Wait() - b.ResetTimer() - close(begin) - end.Wait() - }) - } - } -} - -func BenchmarkMountSyncMapParallelLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for numG, maxG := 1, runtime.GOMAXPROCS(0); numG >= 0 && numG <= maxG; numG *= 2 { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%dx%d", numG, numMounts) - b.Run(desc, func(b *testing.B) { - var ms sync.Map - keys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - key := vdkey(mount) - ms.Store(key, mount) - keys = append(keys, key) - } - - var ready sync.WaitGroup - begin := make(chan struct{}) - var end sync.WaitGroup - for g := 0; g < numG; g++ { - ready.Add(1) - end.Add(1) - go func() { - defer end.Done() - ready.Done() - <-begin - for i := 0; i < b.N; i++ { - k := keys[i&(numMounts-1)] - mi, ok := ms.Load(k) - if !ok { - b.Fatalf("lookup failed") - } - m := mi.(*Mount) - if parent := m.parent(); parent != k.mount { - b.Fatalf("lookup returned mount with parent %p, wanted %p", parent, k.mount) - } - if point := m.point(); point != k.dentry { - b.Fatalf("lookup returned mount with point %p, wanted %p", point, k.dentry) - } - } - }() - } - - ready.Wait() - b.ResetTimer() - close(begin) - end.Wait() - }) - } - } -} - -func BenchmarkMountTableNegativeLookup(b *testing.B) { - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%d", numMounts) - b.Run(desc, func(b *testing.B) { - var mt mountTable - mt.Init() - for i := 0; i < numMounts; i++ { - mt.Insert(newBenchMount()) - } - negkeys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - negkeys = append(negkeys, VirtualDentry{ - mount: &Mount{}, - dentry: &Dentry{}, - }) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - k := negkeys[i&(numMounts-1)] - m := mt.Lookup(k.mount, k.dentry) - if m != nil { - b.Fatalf("lookup got %p, wanted nil", m) - } - } - }) - } -} - -func BenchmarkMountMapNegativeLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%d", numMounts) - b.Run(desc, func(b *testing.B) { - var mu sync.RWMutex - ms := make(map[VirtualDentry]*Mount) - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - ms[vdkey(mount)] = mount - } - negkeys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - negkeys = append(negkeys, VirtualDentry{ - mount: &Mount{}, - dentry: &Dentry{}, - }) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - k := negkeys[i&(numMounts-1)] - mu.RLock() - m := ms[k] - mu.RUnlock() - if m != nil { - b.Fatalf("lookup got %p, wanted nil", m) - } - } - }) - } -} - -func BenchmarkMountSyncMapNegativeLookup(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - for _, numMounts := range benchNumMounts { - desc := fmt.Sprintf("%d", numMounts) - b.Run(desc, func(b *testing.B) { - var ms sync.Map - for i := 0; i < numMounts; i++ { - mount := newBenchMount() - ms.Store(vdkey(mount), mount) - } - negkeys := make([]VirtualDentry, 0, numMounts) - for i := 0; i < numMounts; i++ { - negkeys = append(negkeys, VirtualDentry{ - mount: &Mount{}, - dentry: &Dentry{}, - }) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - k := negkeys[i&(numMounts-1)] - m, _ := ms.Load(k) - if m != nil { - b.Fatalf("lookup got %p, wanted nil", m) - } - } - }) - } -} - -func BenchmarkMountTableInsert(b *testing.B) { - // Preallocate Mounts so that allocation time isn't included in the - // benchmark. - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - - var mt mountTable - mt.Init() - b.ResetTimer() - for i := range mounts { - mt.Insert(mounts[i]) - } -} - -func BenchmarkMountMapInsert(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - // Preallocate Mounts so that allocation time isn't included in the - // benchmark. - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - - ms := make(map[VirtualDentry]*Mount) - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - ms[vdkey(mount)] = mount - } -} - -func BenchmarkMountSyncMapInsert(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - // Preallocate Mounts so that allocation time isn't included in the - // benchmark. - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - - var ms sync.Map - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - ms.Store(vdkey(mount), mount) - } -} - -func BenchmarkMountTableRemove(b *testing.B) { - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - var mt mountTable - mt.Init() - for i := range mounts { - mt.Insert(mounts[i]) - } - - b.ResetTimer() - for i := range mounts { - mt.Remove(mounts[i]) - } -} - -func BenchmarkMountMapRemove(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - ms := make(map[VirtualDentry]*Mount) - for i := range mounts { - mount := mounts[i] - ms[vdkey(mount)] = mount - } - - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - delete(ms, vdkey(mount)) - } -} - -func BenchmarkMountSyncMapRemove(b *testing.B) { - if !enableComparativeBenchmarks { - b.Skipf("comparative benchmarks are disabled") - } - - mounts := make([]*Mount, 0, b.N) - for i := 0; i < b.N; i++ { - mounts = append(mounts, newBenchMount()) - } - var ms sync.Map - for i := range mounts { - mount := mounts[i] - ms.Store(vdkey(mount), mount) - } - - b.ResetTimer() - for i := range mounts { - mount := mounts[i] - ms.Delete(vdkey(mount)) - } -} diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go deleted file mode 100644 index b0511aa40..000000000 --- a/pkg/sentry/vfs/mount_unsafe.go +++ /dev/null @@ -1,356 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build go1.12 -// +build !go1.14 - -// Check go:linkname function signatures when updating Go version. - -package vfs - -import ( - "fmt" - "math/bits" - "reflect" - "sync/atomic" - "unsafe" - - "gvisor.dev/gvisor/third_party/gvsync" -) - -// mountKey represents the location at which a Mount is mounted. It is -// structurally identical to VirtualDentry, but stores its fields as -// unsafe.Pointer since mutators synchronize with VFS path traversal using -// seqcounts. -type mountKey struct { - parent unsafe.Pointer // *Mount - point unsafe.Pointer // *Dentry -} - -// Invariant: mnt.key's fields are nil. parent and point are non-nil. -func (mnt *Mount) storeKey(parent *Mount, point *Dentry) { - atomic.StorePointer(&mnt.key.parent, unsafe.Pointer(parent)) - atomic.StorePointer(&mnt.key.point, unsafe.Pointer(point)) -} - -func (mnt *Mount) loadKey() (*Mount, *Dentry) { - return (*Mount)(atomic.LoadPointer(&mnt.key.parent)), (*Dentry)(atomic.LoadPointer(&mnt.key.point)) -} - -func (mnt *Mount) parent() *Mount { - return (*Mount)(atomic.LoadPointer(&mnt.key.parent)) -} - -func (mnt *Mount) point() *Dentry { - return (*Dentry)(atomic.LoadPointer(&mnt.key.point)) -} - -// mountTable maps (mount parent, mount point) pairs to mounts. It supports -// efficient concurrent lookup, even in the presence of concurrent mutators -// (provided mutation is sufficiently uncommon). -// -// mountTable.Init() must be called on new mountTables before use. -type mountTable struct { - // mountTable is implemented as a seqcount-protected hash table that - // resolves collisions with linear probing, featuring Robin Hood insertion - // and backward shift deletion. These minimize probe length variance, - // significantly improving the performance of linear probing at high load - // factors. (mountTable doesn't use bucketing, which is the other major - // technique commonly used in high-performance hash tables; the efficiency - // of bucketing is largely due to SIMD lookup, and Go lacks both SIMD - // intrinsics and inline assembly, limiting the performance of this - // approach.) - - seq gvsync.SeqCount - seed uint32 // for hashing keys - - // size holds both length (number of elements) and capacity (number of - // slots): capacity is stored as its base-2 log (referred to as order) in - // the least significant bits of size, and length is stored in the - // remaining bits. Go defines bit shifts >= width of shifted unsigned - // operand as shifting to 0, which differs from x86's SHL, so the Go - // compiler inserts a bounds check for each bit shift unless we mask order - // anyway (cf. runtime.bucketShift()), and length isn't used by lookup; - // thus this bit packing gets us more bits for the length (vs. storing - // length and cap in separate uint32s) for ~free. - size uint64 - - slots unsafe.Pointer // []mountSlot; never nil after Init -} - -type mountSlot struct { - // We don't store keys in slots; instead, we just check Mount.parent and - // Mount.point directly. Any practical use of lookup will need to touch - // Mounts anyway, and comparing hashes means that false positives are - // extremely rare, so this isn't an extra cache line touch overall. - value unsafe.Pointer // *Mount - hash uintptr -} - -const ( - mtSizeOrderBits = 6 // log2 of pointer size in bits - mtSizeOrderMask = (1 << mtSizeOrderBits) - 1 - mtSizeOrderOne = 1 - mtSizeLenLSB = mtSizeOrderBits - mtSizeLenOne = 1 << mtSizeLenLSB - mtSizeLenNegOne = ^uint64(mtSizeOrderMask) // uint64(-1) << mtSizeLenLSB - - mountSlotBytes = unsafe.Sizeof(mountSlot{}) - mountKeyBytes = unsafe.Sizeof(mountKey{}) - - // Tuning parameters. - // - // Essentially every mountTable will contain at least /proc, /sys, and - // /dev/shm, so there is ~no reason for mtInitCap to be < 4. - mtInitOrder = 2 - mtInitCap = 1 << mtInitOrder - mtMaxLoadNum = 13 - mtMaxLoadDen = 16 -) - -func init() { - // We can't just define mtSizeOrderBits as follows because Go doesn't have - // constexpr. - if ptrBits := uint(unsafe.Sizeof(uintptr(0)) * 8); mtSizeOrderBits != bits.TrailingZeros(ptrBits) { - panic(fmt.Sprintf("mtSizeOrderBits (%d) must be %d = log2 of pointer size in bits (%d)", mtSizeOrderBits, bits.TrailingZeros(ptrBits), ptrBits)) - } - if bits.OnesCount(uint(mountSlotBytes)) != 1 { - panic(fmt.Sprintf("sizeof(mountSlotBytes) (%d) must be a power of 2 to use bit masking for wraparound", mountSlotBytes)) - } - if mtInitCap <= 1 { - panic(fmt.Sprintf("mtInitCap (%d) must be at least 2 since mountTable methods assume that there will always be at least one empty slot", mtInitCap)) - } - if mtMaxLoadNum >= mtMaxLoadDen { - panic(fmt.Sprintf("invalid mountTable maximum load factor (%d/%d)", mtMaxLoadNum, mtMaxLoadDen)) - } -} - -// Init must be called exactly once on each mountTable before use. -func (mt *mountTable) Init() { - mt.seed = rand32() - mt.size = mtInitOrder - mt.slots = newMountTableSlots(mtInitCap) -} - -func newMountTableSlots(cap uintptr) unsafe.Pointer { - slice := make([]mountSlot, cap, cap) - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) - return unsafe.Pointer(hdr.Data) -} - -// Lookup returns the Mount with the given parent, mounted at the given point. -// If no such Mount exists, Lookup returns nil. -// -// Lookup may be called even if there are concurrent mutators of mt. -func (mt *mountTable) Lookup(parent *Mount, point *Dentry) *Mount { - key := mountKey{parent: unsafe.Pointer(parent), point: unsafe.Pointer(point)} - hash := memhash(noescape(unsafe.Pointer(&key)), uintptr(mt.seed), mountKeyBytes) - -loop: - for { - epoch := mt.seq.BeginRead() - size := atomic.LoadUint64(&mt.size) - slots := atomic.LoadPointer(&mt.slots) - if !mt.seq.ReadOk(epoch) { - continue - } - tcap := uintptr(1) << (size & mtSizeOrderMask) - mask := tcap - 1 - off := (hash & mask) * mountSlotBytes - offmask := mask * mountSlotBytes - for { - // This avoids bounds checking. - slot := (*mountSlot)(unsafe.Pointer(uintptr(slots) + off)) - slotValue := atomic.LoadPointer(&slot.value) - slotHash := atomic.LoadUintptr(&slot.hash) - if !mt.seq.ReadOk(epoch) { - // The element we're looking for might have been moved into a - // slot we've previously checked, so restart entirely. - continue loop - } - if slotValue == nil { - return nil - } - if slotHash == hash { - mount := (*Mount)(slotValue) - var mountKey mountKey - mountKey.parent = atomic.LoadPointer(&mount.key.parent) - mountKey.point = atomic.LoadPointer(&mount.key.point) - if !mt.seq.ReadOk(epoch) { - continue loop - } - if key == mountKey { - return mount - } - } - off = (off + mountSlotBytes) & offmask - } - } -} - -// Insert inserts the given mount into mt. -// -// Preconditions: There are no concurrent mutators of mt. mt must not already -// contain a Mount with the same mount point and parent. -func (mt *mountTable) Insert(mount *Mount) { - hash := memhash(unsafe.Pointer(&mount.key), uintptr(mt.seed), mountKeyBytes) - - // We're under the maximum load factor if: - // - // (len+1) / cap <= mtMaxLoadNum / mtMaxLoadDen - // (len+1) * mtMaxLoadDen <= mtMaxLoadNum * cap - tlen := mt.size >> mtSizeLenLSB - order := mt.size & mtSizeOrderMask - tcap := uintptr(1) << order - if ((tlen + 1) * mtMaxLoadDen) <= (uint64(mtMaxLoadNum) << order) { - // Atomically insert the new element into the table. - mt.seq.BeginWrite() - atomic.AddUint64(&mt.size, mtSizeLenOne) - mtInsertLocked(mt.slots, tcap, unsafe.Pointer(mount), hash) - mt.seq.EndWrite() - return - } - - // Otherwise, we have to expand. Double the number of slots in the new - // table. - newOrder := order + 1 - if newOrder > mtSizeOrderMask { - panic("mount table size overflow") - } - newCap := uintptr(1) << newOrder - newSlots := newMountTableSlots(newCap) - // Copy existing elements to the new table. - oldCur := mt.slots - // Go does not permit pointers to the end of allocated objects, so we - // must use a pointer to the last element of the old table. The - // following expression is equivalent to - // `slots+(cap-1)*mountSlotBytes` but has a critical path length of 2 - // arithmetic instructions instead of 3. - oldLast := unsafe.Pointer((uintptr(mt.slots) - mountSlotBytes) + (tcap * mountSlotBytes)) - for { - oldSlot := (*mountSlot)(oldCur) - if oldSlot.value != nil { - // Don't need to lock mt.seq yet since newSlots isn't visible - // to readers. - mtInsertLocked(newSlots, newCap, oldSlot.value, oldSlot.hash) - } - if oldCur == oldLast { - break - } - oldCur = unsafe.Pointer(uintptr(oldCur) + mountSlotBytes) - } - // Insert the new element into the new table. - mtInsertLocked(newSlots, newCap, unsafe.Pointer(mount), hash) - // Atomically switch to the new table. - mt.seq.BeginWrite() - atomic.AddUint64(&mt.size, mtSizeLenOne|mtSizeOrderOne) - atomic.StorePointer(&mt.slots, newSlots) - mt.seq.EndWrite() -} - -// Preconditions: There are no concurrent mutators of the table (slots, cap). -// If the table is visible to readers, then mt.seq must be in a writer critical -// section. cap must be a power of 2. -func mtInsertLocked(slots unsafe.Pointer, cap uintptr, value unsafe.Pointer, hash uintptr) { - mask := cap - 1 - off := (hash & mask) * mountSlotBytes - offmask := mask * mountSlotBytes - disp := uintptr(0) - for { - slot := (*mountSlot)(unsafe.Pointer(uintptr(slots) + off)) - slotValue := slot.value - if slotValue == nil { - atomic.StorePointer(&slot.value, value) - atomic.StoreUintptr(&slot.hash, hash) - return - } - // If we've been displaced farther from our first-probed slot than the - // element stored in this one, swap elements and switch to inserting - // the replaced one. (This is Robin Hood insertion.) - slotHash := slot.hash - slotDisp := ((off / mountSlotBytes) - slotHash) & mask - if disp > slotDisp { - atomic.StorePointer(&slot.value, value) - atomic.StoreUintptr(&slot.hash, hash) - value = slotValue - hash = slotHash - disp = slotDisp - } - off = (off + mountSlotBytes) & offmask - disp++ - } -} - -// Remove removes the given mount from mt. -// -// Preconditions: There are no concurrent mutators of mt. mt must contain -// mount. -func (mt *mountTable) Remove(mount *Mount) { - hash := memhash(unsafe.Pointer(&mount.key), uintptr(mt.seed), mountKeyBytes) - tcap := uintptr(1) << (mt.size & mtSizeOrderMask) - mask := tcap - 1 - slots := mt.slots - off := (hash & mask) * mountSlotBytes - offmask := mask * mountSlotBytes - for { - slot := (*mountSlot)(unsafe.Pointer(uintptr(slots) + off)) - slotValue := slot.value - if slotValue == unsafe.Pointer(mount) { - // Found the element to remove. Move all subsequent elements - // backward until we either find an empty slot, or an element that - // is already in its first-probed slot. (This is backward shift - // deletion.) - mt.seq.BeginWrite() - for { - nextOff := (off + mountSlotBytes) & offmask - nextSlot := (*mountSlot)(unsafe.Pointer(uintptr(slots) + nextOff)) - nextSlotValue := nextSlot.value - if nextSlotValue == nil { - break - } - nextSlotHash := nextSlot.hash - if (nextOff / mountSlotBytes) == (nextSlotHash & mask) { - break - } - atomic.StorePointer(&slot.value, nextSlotValue) - atomic.StoreUintptr(&slot.hash, nextSlotHash) - off = nextOff - slot = nextSlot - } - atomic.StorePointer(&slot.value, nil) - atomic.AddUint64(&mt.size, mtSizeLenNegOne) - mt.seq.EndWrite() - return - } - if checkInvariants && slotValue == nil { - panic(fmt.Sprintf("mountTable.Remove() called on missing Mount %v", mount)) - } - off = (off + mountSlotBytes) & offmask - } -} - -//go:linkname memhash runtime.memhash -func memhash(p unsafe.Pointer, seed, s uintptr) uintptr - -//go:linkname rand32 runtime.fastrand -func rand32() uint32 - -// This is copy/pasted from runtime.noescape(), and is needed because arguments -// apparently escape from all functions defined by linkname. -// -//go:nosplit -func noescape(p unsafe.Pointer) unsafe.Pointer { - x := uintptr(p) - return unsafe.Pointer(x ^ 0) -} diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go deleted file mode 100644 index 187e5410c..000000000 --- a/pkg/sentry/vfs/options.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" -) - -// GetDentryOptions contains options to VirtualFilesystem.GetDentryAt() and -// FilesystemImpl.GetDentryAt(). -type GetDentryOptions struct { - // If CheckSearchable is true, FilesystemImpl.GetDentryAt() must check that - // the returned Dentry is a directory for which creds has search - // permission. - CheckSearchable bool -} - -// MkdirOptions contains options to VirtualFilesystem.MkdirAt() and -// FilesystemImpl.MkdirAt(). -type MkdirOptions struct { - // Mode is the file mode bits for the created directory. - Mode uint16 -} - -// MknodOptions contains options to VirtualFilesystem.MknodAt() and -// FilesystemImpl.MknodAt(). -type MknodOptions struct { - // Mode is the file type and mode bits for the created file. - Mode uint16 - - // If Mode specifies a character or block device special file, DevMajor and - // DevMinor are the major and minor device numbers for the created device. - DevMajor uint32 - DevMinor uint32 -} - -// OpenOptions contains options to VirtualFilesystem.OpenAt() and -// FilesystemImpl.OpenAt(). -type OpenOptions struct { - // Flags contains access mode and flags as specified for open(2). - // - // FilesystemImpls is reponsible for implementing the following flags: - // O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, O_CREAT, O_DIRECT, O_DSYNC, - // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_PATH, O_SYNC, O_TMPFILE, and - // O_TRUNC. VFS is responsible for handling O_DIRECTORY, O_LARGEFILE, and - // O_NOFOLLOW. VFS users are responsible for handling O_CLOEXEC, since file - // descriptors are mostly outside the scope of VFS. - Flags uint32 - - // If FilesystemImpl.OpenAt() creates a file, Mode is the file mode for the - // created file. - Mode uint16 -} - -// ReadOptions contains options to FileDescription.PRead(), -// FileDescriptionImpl.PRead(), FileDescription.Read(), and -// FileDescriptionImpl.Read(). -type ReadOptions struct { - // Flags contains flags as specified for preadv2(2). - Flags uint32 -} - -// RenameOptions contains options to VirtualFilesystem.RenameAt() and -// FilesystemImpl.RenameAt(). -type RenameOptions struct { - // Flags contains flags as specified for renameat2(2). - Flags uint32 -} - -// SetStatOptions contains options to VirtualFilesystem.SetStatAt(), -// FilesystemImpl.SetStatAt(), FileDescription.SetStat(), and -// FileDescriptionImpl.SetStat(). -type SetStatOptions struct { - // Stat is the metadata that should be set. Only fields indicated by - // Stat.Mask should be set. - // - // If Stat specifies that a timestamp should be set, - // FilesystemImpl.SetStatAt() and FileDescriptionImpl.SetStat() must - // special-case StatxTimestamp.Nsec == UTIME_NOW as described by - // utimensat(2); however, they do not need to check for StatxTimestamp.Nsec - // == UTIME_OMIT (VFS users must unset the corresponding bit in Stat.Mask - // instead). - Stat linux.Statx -} - -// StatOptions contains options to VirtualFilesystem.StatAt(), -// FilesystemImpl.StatAt(), FileDescription.Stat(), and -// FileDescriptionImpl.Stat(). -type StatOptions struct { - // Mask is the set of fields in the returned Statx that the FilesystemImpl - // or FileDescriptionImpl should provide. Bits are as in linux.Statx.Mask. - // - // The FilesystemImpl or FileDescriptionImpl may return fields not - // requested in Mask, and may fail to return fields requested in Mask that - // are not supported by the underlying filesystem implementation, without - // returning an error. - Mask uint32 - - // Sync specifies the synchronization required, and is one of - // linux.AT_STATX_SYNC_AS_STAT (which is 0, and therefore the default), - // linux.AT_STATX_SYNC_FORCE_SYNC, or linux.AT_STATX_SYNC_DONT_SYNC. - Sync uint32 -} - -// WriteOptions contains options to FileDescription.PWrite(), -// FileDescriptionImpl.PWrite(), FileDescription.Write(), and -// FileDescriptionImpl.Write(). -type WriteOptions struct { - // Flags contains flags as specified for pwritev2(2). - Flags uint32 -} diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go deleted file mode 100644 index f8e74355c..000000000 --- a/pkg/sentry/vfs/permissions.go +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -// AccessTypes is a bitmask of Unix file permissions. -type AccessTypes uint16 - -// Bits in AccessTypes. -const ( - MayRead AccessTypes = 4 - MayWrite = 2 - MayExec = 1 -) - -// GenericCheckPermissions checks that creds has the given access rights on a -// file with the given permissions, UID, and GID, subject to the rules of -// fs/namei.c:generic_permission(). isDir is true if the file is a directory. -func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, isDir bool, mode uint16, kuid auth.KUID, kgid auth.KGID) error { - // Check permission bits. - perms := mode - if creds.EffectiveKUID == kuid { - perms >>= 6 - } else if creds.InGroup(kgid) { - perms >>= 3 - } - if uint16(ats)&perms == uint16(ats) { - return nil - } - - // Caller capabilities require that the file's KUID and KGID are mapped in - // the caller's user namespace; compare - // kernel/capability.c:privileged_wrt_inode_uidgid(). - if !kuid.In(creds.UserNamespace).Ok() || !kgid.In(creds.UserNamespace).Ok() { - return syserror.EACCES - } - // CAP_DAC_READ_SEARCH allows the caller to read and search arbitrary - // directories, and read arbitrary non-directory files. - if (isDir && (ats&MayWrite == 0)) || ats == MayRead { - if creds.HasCapability(linux.CAP_DAC_READ_SEARCH) { - return nil - } - } - // CAP_DAC_OVERRIDE allows arbitrary access to directories, read/write - // access to non-directory files, and execute access to non-directory files - // for which at least one execute bit is set. - if isDir || (ats&MayExec == 0) || (mode&0111 != 0) { - if creds.HasCapability(linux.CAP_DAC_OVERRIDE) { - return nil - } - } - return syserror.EACCES -} - -// AccessTypesForOpenFlags returns the access types required to open a file -// with the given OpenOptions.Flags. Note that this is NOT the same thing as -// the set of accesses permitted for the opened file: -// -// - O_TRUNC causes MayWrite to be set in the returned AccessTypes (since it -// mutates the file), but does not permit the opened to write to the file -// thereafter. -// -// - "Linux reserves the special, nonstandard access mode 3 (binary 11) in -// flags to mean: check for read and write permission on the file and return a -// file descriptor that can't be used for reading or writing." - open(2). Thus -// AccessTypesForOpenFlags returns MayRead|MayWrite in this case, but -// filesystems are responsible for ensuring that access is denied. -// -// Use May{Read,Write}FileWithOpenFlags() for these checks instead. -func AccessTypesForOpenFlags(flags uint32) AccessTypes { - switch flags & linux.O_ACCMODE { - case linux.O_RDONLY: - if flags&linux.O_TRUNC != 0 { - return MayRead | MayWrite - } - return MayRead - case linux.O_WRONLY: - return MayWrite - default: - return MayRead | MayWrite - } -} - -// MayReadFileWithOpenFlags returns true if a file with the given open flags -// should be readable. -func MayReadFileWithOpenFlags(flags uint32) bool { - switch flags & linux.O_ACCMODE { - case linux.O_RDONLY, linux.O_RDWR: - return true - default: - return false - } -} - -// MayWriteFileWithOpenFlags returns true if a file with the given open flags -// should be writable. -func MayWriteFileWithOpenFlags(flags uint32) bool { - switch flags & linux.O_ACCMODE { - case linux.O_WRONLY, linux.O_RDWR: - return true - default: - return false - } -} diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go deleted file mode 100644 index 8d05c8583..000000000 --- a/pkg/sentry/vfs/resolving_path.go +++ /dev/null @@ -1,453 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "fmt" - "sync" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -// ResolvingPath represents the state of an in-progress path resolution, shared -// between VFS and FilesystemImpl methods that take a path. -// -// From the perspective of FilesystemImpl methods, a ResolvingPath represents a -// starting Dentry on the associated Filesystem (on which a reference is -// already held) and a stream of path components relative to that Dentry. -// -// ResolvingPath is loosely analogous to Linux's struct nameidata. -type ResolvingPath struct { - vfs *VirtualFilesystem - root VirtualDentry // refs borrowed from PathOperation - mount *Mount - start *Dentry - pit fspath.Iterator - - flags uint16 - mustBeDir bool // final file must be a directory? - mustBeDirOrig bool - symlinks uint8 // number of symlinks traversed - symlinksOrig uint8 - curPart uint8 // index into parts - numOrigParts uint8 - - creds *auth.Credentials - - // Data associated with resolve*Errors, stored in ResolvingPath so that - // those errors don't need to allocate. - nextMount *Mount // ref held if not nil - nextStart *Dentry // ref held if not nil - absSymlinkTarget fspath.Path - - // ResolvingPath must track up to two relative paths: the "current" - // relative path, which is updated whenever a relative symlink is - // encountered, and the "original" relative path, which is updated from the - // current relative path by handleError() when resolution must change - // filesystems (due to reaching a mount boundary or absolute symlink) and - // overwrites the current relative path when Restart() is called. - parts [1 + linux.MaxSymlinkTraversals]fspath.Iterator - origParts [1 + linux.MaxSymlinkTraversals]fspath.Iterator -} - -const ( - rpflagsHaveMountRef = 1 << iota // do we hold a reference on mount? - rpflagsHaveStartRef // do we hold a reference on start? - rpflagsFollowFinalSymlink // same as PathOperation.FollowFinalSymlink -) - -func init() { - if maxParts := len(ResolvingPath{}.parts); maxParts > 255 { - panic(fmt.Sprintf("uint8 is insufficient to accommodate len(ResolvingPath.parts) (%d)", maxParts)) - } -} - -// Error types that communicate state from the FilesystemImpl-caller, -// VFS-callee side of path resolution (i.e. errors returned by -// ResolvingPath.Resolve*()) to the VFS-caller, FilesystemImpl-callee side -// (i.e. VFS methods => ResolvingPath.handleError()). These are empty structs -// rather than error values because Go doesn't support non-primitive constants, -// so error "constants" are really mutable vars, necessitating somewhat -// expensive interface object comparisons. - -type resolveMountRootError struct{} - -// Error implements error.Error. -func (resolveMountRootError) Error() string { - return "resolving mount root" -} - -type resolveMountPointError struct{} - -// Error implements error.Error. -func (resolveMountPointError) Error() string { - return "resolving mount point" -} - -type resolveAbsSymlinkError struct{} - -// Error implements error.Error. -func (resolveAbsSymlinkError) Error() string { - return "resolving absolute symlink" -} - -var resolvingPathPool = sync.Pool{ - New: func() interface{} { - return &ResolvingPath{} - }, -} - -func (vfs *VirtualFilesystem) getResolvingPath(creds *auth.Credentials, pop *PathOperation) (*ResolvingPath, error) { - path, err := fspath.Parse(pop.Pathname) - if err != nil { - return nil, err - } - rp := resolvingPathPool.Get().(*ResolvingPath) - rp.vfs = vfs - rp.root = pop.Root - rp.mount = pop.Start.mount - rp.start = pop.Start.dentry - rp.pit = path.Begin - rp.flags = 0 - if pop.FollowFinalSymlink { - rp.flags |= rpflagsFollowFinalSymlink - } - rp.mustBeDir = path.Dir - rp.mustBeDirOrig = path.Dir - rp.symlinks = 0 - rp.curPart = 0 - rp.numOrigParts = 1 - rp.creds = creds - rp.parts[0] = path.Begin - rp.origParts[0] = path.Begin - return rp, nil -} - -func (vfs *VirtualFilesystem) putResolvingPath(rp *ResolvingPath) { - rp.root = VirtualDentry{} - rp.decRefStartAndMount() - rp.mount = nil - rp.start = nil - rp.releaseErrorState() - resolvingPathPool.Put(rp) -} - -func (rp *ResolvingPath) decRefStartAndMount() { - if rp.flags&rpflagsHaveStartRef != 0 { - rp.start.decRef(rp.mount.fs) - } - if rp.flags&rpflagsHaveMountRef != 0 { - rp.mount.decRef() - } -} - -func (rp *ResolvingPath) releaseErrorState() { - if rp.nextStart != nil { - rp.nextStart.decRef(rp.nextMount.fs) - rp.nextStart = nil - } - if rp.nextMount != nil { - rp.nextMount.decRef() - rp.nextMount = nil - } -} - -// VirtualFilesystem returns the containing VirtualFilesystem. -func (rp *ResolvingPath) VirtualFilesystem() *VirtualFilesystem { - return rp.vfs -} - -// Credentials returns the credentials of rp's provider. -func (rp *ResolvingPath) Credentials() *auth.Credentials { - return rp.creds -} - -// Mount returns the Mount on which path resolution is currently occurring. It -// does not take a reference on the returned Mount. -func (rp *ResolvingPath) Mount() *Mount { - return rp.mount -} - -// Start returns the starting Dentry represented by rp. It does not take a -// reference on the returned Dentry. -func (rp *ResolvingPath) Start() *Dentry { - return rp.start -} - -// Done returns true if there are no remaining path components in the stream -// represented by rp. -func (rp *ResolvingPath) Done() bool { - // We don't need to check for rp.curPart == 0 because rp.Advance() won't - // set rp.pit to a terminal iterator otherwise. - return !rp.pit.Ok() -} - -// Final returns true if there is exactly one remaining path component in the -// stream represented by rp. -// -// Preconditions: !rp.Done(). -func (rp *ResolvingPath) Final() bool { - return rp.curPart == 0 && !rp.pit.NextOk() -} - -// Component returns the current path component in the stream represented by -// rp. -// -// Preconditions: !rp.Done(). -func (rp *ResolvingPath) Component() string { - if checkInvariants { - if !rp.pit.Ok() { - panic("ResolvingPath.Component() called at end of relative path") - } - } - return rp.pit.String() -} - -// Advance advances the stream of path components represented by rp. -// -// Preconditions: !rp.Done(). -func (rp *ResolvingPath) Advance() { - if checkInvariants { - if !rp.pit.Ok() { - panic("ResolvingPath.Advance() called at end of relative path") - } - } - next := rp.pit.Next() - if next.Ok() || rp.curPart == 0 { // have next component, or at end of path - rp.pit = next - } else { // at end of path segment, continue with next one - rp.curPart-- - rp.pit = rp.parts[rp.curPart-1] - } -} - -// Restart resets the stream of path components represented by rp to its state -// on entry to the current FilesystemImpl method. -func (rp *ResolvingPath) Restart() { - rp.pit = rp.origParts[rp.numOrigParts-1] - rp.mustBeDir = rp.mustBeDirOrig - rp.symlinks = rp.symlinksOrig - rp.curPart = rp.numOrigParts - 1 - copy(rp.parts[:], rp.origParts[:rp.numOrigParts]) - rp.releaseErrorState() -} - -func (rp *ResolvingPath) relpathCommit() { - rp.mustBeDirOrig = rp.mustBeDir - rp.symlinksOrig = rp.symlinks - rp.numOrigParts = rp.curPart + 1 - copy(rp.origParts[:rp.curPart], rp.parts[:]) - rp.origParts[rp.curPart] = rp.pit -} - -// ResolveParent returns the VFS parent of d. It does not take a reference on -// the returned Dentry. -// -// Preconditions: There are no concurrent mutators of d. -// -// Postconditions: If the returned error is nil, then the returned Dentry is -// not nil. -func (rp *ResolvingPath) ResolveParent(d *Dentry) (*Dentry, error) { - var parent *Dentry - if d == rp.root.dentry && rp.mount == rp.root.mount { - // At contextual VFS root. - parent = d - } else if d == rp.mount.root { - // At mount root ... - mnt, mntpt := rp.vfs.getMountpointAt(rp.mount, rp.root) - if mnt != nil { - // ... of non-root mount. - rp.nextMount = mnt - rp.nextStart = mntpt - return nil, resolveMountRootError{} - } - // ... of root mount. - parent = d - } else if d.parent == nil { - // At filesystem root. - parent = d - } else { - parent = d.parent - } - if parent.isMounted() { - if mnt := rp.vfs.getMountAt(rp.mount, parent); mnt != nil { - rp.nextMount = mnt - return nil, resolveMountPointError{} - } - } - return parent, nil -} - -// ResolveChild returns the VFS child of d with the given name. It does not -// take a reference on the returned Dentry. If no such child exists, -// ResolveChild returns (nil, nil). -// -// Preconditions: There are no concurrent mutators of d. -func (rp *ResolvingPath) ResolveChild(d *Dentry, name string) (*Dentry, error) { - child := d.children[name] - if child == nil { - return nil, nil - } - if child.isMounted() { - if mnt := rp.vfs.getMountAt(rp.mount, child); mnt != nil { - rp.nextMount = mnt - return nil, resolveMountPointError{} - } - } - return child, nil -} - -// ResolveComponent returns the Dentry reached by starting at d and resolving -// the current path component in the stream represented by rp. It does not -// advance the stream. It does not take a reference on the returned Dentry. If -// no such Dentry exists, ResolveComponent returns (nil, nil). -// -// Preconditions: !rp.Done(). There are no concurrent mutators of d. -func (rp *ResolvingPath) ResolveComponent(d *Dentry) (*Dentry, error) { - switch pc := rp.Component(); pc { - case ".": - return d, nil - case "..": - return rp.ResolveParent(d) - default: - return rp.ResolveChild(d, pc) - } -} - -// ShouldFollowSymlink returns true if, supposing that the current path -// component in pcs represents a symbolic link, the symbolic link should be -// followed. -// -// Preconditions: !rp.Done(). -func (rp *ResolvingPath) ShouldFollowSymlink() bool { - // Non-final symlinks are always followed. - return rp.flags&rpflagsFollowFinalSymlink != 0 || !rp.Final() -} - -// HandleSymlink is called when the current path component is a symbolic link -// to the given target. If the calling Filesystem method should continue path -// traversal, HandleSymlink updates the path component stream to reflect the -// symlink target and returns nil. Otherwise it returns a non-nil error. -// -// Preconditions: !rp.Done(). -func (rp *ResolvingPath) HandleSymlink(target string) error { - if rp.symlinks >= linux.MaxSymlinkTraversals { - return syserror.ELOOP - } - targetPath, err := fspath.Parse(target) - if err != nil { - return err - } - rp.symlinks++ - if targetPath.Absolute { - rp.absSymlinkTarget = targetPath - return resolveAbsSymlinkError{} - } - if !targetPath.Begin.Ok() { - panic(fmt.Sprintf("symbolic link has non-empty target %q that is both relative and has no path components?", target)) - } - // Consume the path component that represented the symlink. - rp.Advance() - // Prepend the symlink target to the relative path. - rp.relpathPrepend(targetPath) - return nil -} - -func (rp *ResolvingPath) relpathPrepend(path fspath.Path) { - if rp.pit.Ok() { - rp.parts[rp.curPart] = rp.pit - rp.pit = path.Begin - rp.curPart++ - } else { - // The symlink was the final path component, so now the symlink target - // is the whole path. - rp.pit = path.Begin - // Symlink targets can set rp.mustBeDir (if they end in a trailing /), - // but can't unset it. - if path.Dir { - rp.mustBeDir = true - } - } -} - -func (rp *ResolvingPath) handleError(err error) bool { - switch err.(type) { - case resolveMountRootError: - // Switch to the new Mount. We hold references on the Mount and Dentry - // (from VFS.getMountpointAt()). - rp.decRefStartAndMount() - rp.mount = rp.nextMount - rp.start = rp.nextStart - rp.flags |= rpflagsHaveMountRef | rpflagsHaveStartRef - rp.nextMount = nil - rp.nextStart = nil - // Commit the previous FileystemImpl's progress through the relative - // path. (Don't consume the path component that caused us to traverse - // through the mount root - i.e. the ".." - because we still need to - // resolve the mount point's parent in the new FilesystemImpl.) - rp.relpathCommit() - // Restart path resolution on the new Mount. Don't bother calling - // rp.releaseErrorState() since we already set nextMount and nextStart - // to nil above. - return true - - case resolveMountPointError: - // Switch to the new Mount. We hold a reference on the Mount (from - // VFS.getMountAt()), but borrow the reference on the mount root from - // the Mount. - rp.decRefStartAndMount() - rp.mount = rp.nextMount - rp.start = rp.nextMount.root - rp.flags = rp.flags&^rpflagsHaveStartRef | rpflagsHaveMountRef - rp.nextMount = nil - // Consume the path component that represented the mount point. - rp.Advance() - // Commit the previous FilesystemImpl's progress through the relative - // path. - rp.relpathCommit() - // Restart path resolution on the new Mount. - rp.releaseErrorState() - return true - - case resolveAbsSymlinkError: - // Switch to the new Mount. References are borrowed from rp.root. - rp.decRefStartAndMount() - rp.mount = rp.root.mount - rp.start = rp.root.dentry - rp.flags &^= rpflagsHaveMountRef | rpflagsHaveStartRef - // Consume the path component that represented the symlink. - rp.Advance() - // Prepend the symlink target to the relative path. - rp.relpathPrepend(rp.absSymlinkTarget) - // Commit the previous FilesystemImpl's progress through the relative - // path, including the symlink target we just prepended. - rp.relpathCommit() - // Restart path resolution on the new Mount. - rp.releaseErrorState() - return true - - default: - // Not an error we can handle. - return false - } -} - -// MustBeDir returns true if the file traversed by rp must be a directory. -func (rp *ResolvingPath) MustBeDir() bool { - return rp.mustBeDir -} diff --git a/pkg/sentry/vfs/syscalls.go b/pkg/sentry/vfs/syscalls.go deleted file mode 100644 index 23f2b9e08..000000000 --- a/pkg/sentry/vfs/syscalls.go +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" -) - -// PathOperation specifies the path operated on by a VFS method. -// -// PathOperation is passed to VFS methods by pointer to reduce memory copying: -// it's somewhat large and should never escape. (Options structs are passed by -// pointer to VFS and FileDescription methods for the same reason.) -type PathOperation struct { - // Root is the VFS root. References on Root are borrowed from the provider - // of the PathOperation. - // - // Invariants: Root.Ok(). - Root VirtualDentry - - // Start is the starting point for the path traversal. References on Start - // are borrowed from the provider of the PathOperation (i.e. the caller of - // the VFS method to which the PathOperation was passed). - // - // Invariants: Start.Ok(). If Pathname.Absolute, then Start == Root. - Start VirtualDentry - - // Path is the pathname traversed by this operation. - Pathname string - - // If FollowFinalSymlink is true, and the Dentry traversed by the final - // path component represents a symbolic link, the symbolic link should be - // followed. - FollowFinalSymlink bool -} - -// GetDentryAt returns a VirtualDentry representing the given path, at which a -// file must exist. A reference is taken on the returned VirtualDentry. -func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetDentryOptions) (VirtualDentry, error) { - rp, err := vfs.getResolvingPath(creds, pop) - if err != nil { - return VirtualDentry{}, err - } - for { - d, err := rp.mount.fs.impl.GetDentryAt(ctx, rp, *opts) - if err == nil { - vd := VirtualDentry{ - mount: rp.mount, - dentry: d, - } - rp.mount.incRef() - vfs.putResolvingPath(rp) - return vd, nil - } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - return VirtualDentry{}, err - } - } -} - -// MkdirAt creates a directory at the given path. -func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MkdirOptions) error { - // "Under Linux, apart from the permission bits, the S_ISVTX mode bit is - // also honored." - mkdir(2) - opts.Mode &= 01777 - rp, err := vfs.getResolvingPath(creds, pop) - if err != nil { - return err - } - for { - err := rp.mount.fs.impl.MkdirAt(ctx, rp, *opts) - if err == nil { - vfs.putResolvingPath(rp) - return nil - } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - return err - } - } -} - -// OpenAt returns a FileDescription providing access to the file at the given -// path. A reference is taken on the returned FileDescription. -func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *OpenOptions) (*FileDescription, error) { - // Remove: - // - // - O_LARGEFILE, which we always report in FileDescription status flags - // since only 64-bit architectures are supported at this time. - // - // - O_CLOEXEC, which affects file descriptors and therefore must be - // handled outside of VFS. - // - // - Unknown flags. - opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC | linux.O_APPEND | linux.O_NONBLOCK | linux.O_DSYNC | linux.O_ASYNC | linux.O_DIRECT | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NOATIME | linux.O_SYNC | linux.O_PATH | linux.O_TMPFILE - // Linux's __O_SYNC (which we call linux.O_SYNC) implies O_DSYNC. - if opts.Flags&linux.O_SYNC != 0 { - opts.Flags |= linux.O_DSYNC - } - // Linux's __O_TMPFILE (which we call linux.O_TMPFILE) must be specified - // with O_DIRECTORY and a writable access mode (to ensure that it fails on - // filesystem implementations that do not support it). - if opts.Flags&linux.O_TMPFILE != 0 { - if opts.Flags&linux.O_DIRECTORY == 0 { - return nil, syserror.EINVAL - } - if opts.Flags&linux.O_CREAT != 0 { - return nil, syserror.EINVAL - } - if opts.Flags&linux.O_ACCMODE == linux.O_RDONLY { - return nil, syserror.EINVAL - } - } - // O_PATH causes most other flags to be ignored. - if opts.Flags&linux.O_PATH != 0 { - opts.Flags &= linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_PATH - } - // "On Linux, the following bits are also honored in mode: [S_ISUID, - // S_ISGID, S_ISVTX]" - open(2) - opts.Mode &= 07777 - - if opts.Flags&linux.O_NOFOLLOW != 0 { - pop.FollowFinalSymlink = false - } - rp, err := vfs.getResolvingPath(creds, pop) - if err != nil { - return nil, err - } - if opts.Flags&linux.O_DIRECTORY != 0 { - rp.mustBeDir = true - rp.mustBeDirOrig = true - } - for { - fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts) - if err == nil { - vfs.putResolvingPath(rp) - return fd, nil - } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - return nil, err - } - } -} - -// StatAt returns metadata for the file at the given path. -func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *StatOptions) (linux.Statx, error) { - rp, err := vfs.getResolvingPath(creds, pop) - if err != nil { - return linux.Statx{}, err - } - for { - stat, err := rp.mount.fs.impl.StatAt(ctx, rp, *opts) - if err == nil { - vfs.putResolvingPath(rp) - return stat, nil - } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - return linux.Statx{}, err - } - } -} - -// StatusFlags returns file description status flags. -func (fd *FileDescription) StatusFlags(ctx context.Context) (uint32, error) { - flags, err := fd.impl.StatusFlags(ctx) - flags |= linux.O_LARGEFILE - return flags, err -} - -// SetStatusFlags sets file description status flags. -func (fd *FileDescription) SetStatusFlags(ctx context.Context, flags uint32) error { - return fd.impl.SetStatusFlags(ctx, flags) -} - -// TODO: -// -// - VFS.SyncAllFilesystems() for sync(2) -// -// - Something for syncfs(2) -// -// - VFS.LinkAt() -// -// - VFS.MknodAt() -// -// - VFS.ReadlinkAt() -// -// - VFS.RenameAt() -// -// - VFS.RmdirAt() -// -// - VFS.SetStatAt() -// -// - VFS.StatFSAt() -// -// - VFS.SymlinkAt() -// -// - VFS.UnlinkAt() -// -// - FileDescription.(almost everything) diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go deleted file mode 100644 index 4a8a69540..000000000 --- a/pkg/sentry/vfs/vfs.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package vfs implements a virtual filesystem layer. -// -// Lock order: -// -// Filesystem implementation locks -// VirtualFilesystem.mountMu -// VirtualFilesystem.fsTypesMu -package vfs - -import ( - "sync" -) - -// A VirtualFilesystem (VFS for short) combines Filesystems in trees of Mounts. -// -// There is no analogue to the VirtualFilesystem type in Linux, as the -// equivalent state in Linux is global. -type VirtualFilesystem struct { - // mountMu serializes mount mutations. - // - // mountMu is analogous to Linux's namespace_sem. - mountMu sync.RWMutex - - // mounts maps (mount parent, mount point) pairs to mounts. (Since mounts - // are uniquely namespaced, including mount parent in the key correctly - // handles both bind mounts and mount namespaces; Linux does the same.) - // Synchronization between mutators and readers is provided by mounts.seq; - // synchronization between mutators is provided by mountMu. - // - // mounts is used to follow mount points during path traversal. We use a - // single table rather than per-Dentry tables to reduce size (and therefore - // cache footprint) for the vast majority of Dentries that are not mount - // points. - // - // mounts is analogous to Linux's mount_hashtable. - mounts mountTable - - // mountpoints maps mount points to mounts at those points in all - // namespaces. mountpoints is protected by mountMu. - // - // mountpoints is used to find mounts that must be unmounted due to - // removal of a mount point Dentry from another mount namespace. ("A file - // or directory that is a mount point in one namespace that is not a mount - // point in another namespace, may be renamed, unlinked, or removed - // (rmdir(2)) in the mount namespace in which it is not a mount point - // (subject to the usual permission checks)." - mount_namespaces(7)) - // - // mountpoints is analogous to Linux's mountpoint_hashtable. - mountpoints map[*Dentry]map[*Mount]struct{} - - // fsTypes contains all FilesystemTypes that are usable in the - // VirtualFilesystem. fsTypes is protected by fsTypesMu. - fsTypesMu sync.RWMutex - fsTypes map[string]FilesystemType -} - -// New returns a new VirtualFilesystem with no mounts or FilesystemTypes. -func New() *VirtualFilesystem { - vfs := &VirtualFilesystem{ - mountpoints: make(map[*Dentry]map[*Mount]struct{}), - fsTypes: make(map[string]FilesystemType), - } - vfs.mounts.Init() - return vfs -} - -// A VirtualDentry represents a node in a VFS tree, by combining a Dentry -// (which represents a node in a Filesystem's tree) and a Mount (which -// represents the Filesystem's position in a VFS mount tree). -// -// VirtualDentry's semantics are similar to that of a Go interface object -// representing a pointer: it is a copyable value type that represents -// references to another entity. The zero value of VirtualDentry is an "empty -// VirtualDentry", directly analogous to a nil interface object. -// VirtualDentry.Ok() checks that a VirtualDentry is not zero-valued; unless -// otherwise specified, all other VirtualDentry methods require -// VirtualDentry.Ok() == true. -// -// Mounts and Dentries are reference-counted, requiring that users call -// VirtualDentry.{Inc,Dec}Ref() as appropriate. We often colloquially refer to -// references on the Mount and Dentry referred to by a VirtualDentry as -// references on the VirtualDentry itself. Unless otherwise specified, all -// VirtualDentry methods require that a reference is held on the VirtualDentry. -// -// VirtualDentry is analogous to Linux's struct path. -type VirtualDentry struct { - mount *Mount - dentry *Dentry -} - -// Ok returns true if vd is not empty. It does not require that a reference is -// held. -func (vd VirtualDentry) Ok() bool { - return vd.mount != nil -} - -// IncRef increments the reference counts on the Mount and Dentry represented -// by vd. -func (vd VirtualDentry) IncRef() { - vd.mount.incRef() - vd.dentry.incRef(vd.mount.fs) -} - -// DecRef decrements the reference counts on the Mount and Dentry represented -// by vd. -func (vd VirtualDentry) DecRef() { - vd.dentry.decRef(vd.mount.fs) - vd.mount.decRef() -} - -// Mount returns the Mount associated with vd. It does not take a reference on -// the returned Mount. -func (vd VirtualDentry) Mount() *Mount { - return vd.mount -} - -// Dentry returns the Dentry associated with vd. It does not take a reference -// on the returned Dentry. -func (vd VirtualDentry) Dentry() *Dentry { - return vd.dentry -} diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD deleted file mode 100644 index 4d8435265..000000000 --- a/pkg/sentry/watchdog/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "watchdog", - srcs = ["watchdog.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/watchdog", - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "//pkg/log", - "//pkg/metric", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - ], -) diff --git a/pkg/sentry/watchdog/watchdog_state_autogen.go b/pkg/sentry/watchdog/watchdog_state_autogen.go new file mode 100755 index 000000000..530ac6a07 --- /dev/null +++ b/pkg/sentry/watchdog/watchdog_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package watchdog + |